from medinfo.common.test.Util import MedInfoTestCase;
from medinfo.db import DBUtil
from medinfo.db.Model import SQLQuery;

# Create SCRIPT_FILE

# clinical_item_category_id = 13 (Discharge)
# clinical_item_category_id = 23 (Admission)

# H&P Note

SCRIPT_FILE = StringIO()
SCRIPT_FILE.write("psql stride jwang198")
DATA_QUERY = SQLQuery();

DATA_QUERY.addSelect("author_name")
DATA_QUERY.addSelect("pat_id")
DATA_QUERY.addSelect("note_date")
DATA_QUERY.addSelect("note_type")
DATA_QUERY.addSelect("provider_type")
DATA_QUERY.addSelect("specialty")
DATA_QUERY.addSelect("pat_enc_csn_id")

DATA_QUERY.addFrom("stride_note")
DATA_QUERY.addWhereEqual("note_type", "H&P")
DATA_QUERY.addOrderBy("author_name", dir="asc")
print(DATA_QUERY)

# Write out data to CSV
DBUtil.runDBScript(SCRIPT_FILE, False)
results = DBUtil.execute(DATA_QUERY);
Exemple #2
0
    def loadPatientInfo(self, patientIds=None, relativeTime=None, conn=None):
        """Load basic information about the specified patients.
        Report patient state at given time, or default to time zero
        """
        if relativeTime is None:
            relativeTime = 0  # Just look for time zero default then

        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("sp.sim_patient_id")
            query.addSelect("sp.name")
            query.addSelect("sp.age_years")
            query.addSelect("sp.gender")
            query.addSelect("s.sim_state_id")
            query.addSelect("s.name as state_name")
            query.addSelect("s.description as state_description")
            query.addSelect("sps.relative_time_start")
            query.addSelect("sps.relative_time_end")
            query.addFrom("sim_patient as sp")
            query.addFrom("sim_patient_state as sps")
            query.addFrom("sim_state as s")
            query.addWhere("sp.sim_patient_id = sps.sim_patient_id")
            query.addWhere("sps.sim_state_id = s.sim_state_id")
            if patientIds is not None:
                query.addWhereIn("sp.sim_patient_id", patientIds)

            # Look for the state that matches the given relative time offset
            query.addWhereOp("sps.relative_time_start", "<=", relativeTime)
            query.openWhereOrClause()
            query.addWhere("sps.relative_time_end is null")
            query.addWhereOp("sps.relative_time_end", ">", relativeTime)
            query.closeWhereOrClause()

            query.addOrderBy("sp.name")

            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)

            if len(dataModels) > 0:
                # Secondary query to build lookup table of possible state transition options from patient current states
                subQuery = SQLQuery()
                subQuery.addSelect("pre_state_id")
                subQuery.addSelect("post_state_id")
                subQuery.addSelect("clinical_item_id")
                subQuery.addSelect("time_trigger")
                subQuery.addFrom("sim_state_transition as sst")
                subQuery.addWhereIn(
                    "pre_state_id",
                    columnFromModelList(dataModels, "sim_state_id"))
                subResults = DBUtil.execute(subQuery, conn=conn)

                # For each pre-state, track which clinical items or times trigger which post-states
                postStateIdByItemIdByPreStateId = dict()
                postStateIdTimeTriggerByPreStateId = dict()
                for preStateId, postStateId, itemId, timeTrigger in subResults:
                    if preStateId not in postStateIdByItemIdByPreStateId:
                        postStateIdByItemIdByPreStateId[preStateId] = dict()
                    postStateIdByItemIdByPreStateId[preStateId][
                        itemId] = postStateId

                    if timeTrigger is not None:
                        postStateIdTimeTriggerByPreStateId[preStateId] = (
                            postStateId, timeTrigger)

                # Record in patient result models for retrieval
                for i, dataModel in enumerate(dataModels):
                    patientId = dataModel["sim_patient_id"]
                    stateId = dataModel["sim_state_id"]

                    dataModel["postStateIdByItemId"] = dict()
                    if stateId in postStateIdByItemIdByPreStateId:
                        dataModel[
                            "postStateIdByItemId"] = postStateIdByItemIdByPreStateId[
                                stateId]
                    dataModel["postStateIdTimeTriggerByPreStateId"] = dict()
                    if stateId in postStateIdTimeTriggerByPreStateId:
                        dataModel[
                            "postStateIdTimeTrigger"] = postStateIdTimeTriggerByPreStateId[
                                stateId]

                    if dataModel[
                            "relative_time_end"] is None and "postStateIdTimeTrigger" in dataModel:
                        # Check that we haven't passed (and should thus trigger) a time-based state transition
                        (postStateId,
                         timeTrigger) = dataModel["postStateIdTimeTrigger"]
                        preStateTime = dataModel["relative_time_start"]
                        postStateTriggerTime = (preStateTime + timeTrigger)

                        if postStateTriggerTime <= relativeTime:  # Trigger state transition just by time elapsed
                            #print >> sys.stderr, relativeTime, preStateTime, stateId, postStateTriggerTime, postStateId
                            self.recordStateTransition(patientId,
                                                       stateId,
                                                       postStateId,
                                                       postStateTriggerTime,
                                                       conn=conn)
                            # State change which can yield new triggers, so recursively reload.
                            # Small risk of infinite recusion if timeTriggers are zero. Otherwise, should converge as each recursion will update the preState relativeTimeStart
                            dataModels[i] = self.loadPatientInfo([patientId],
                                                                 relativeTime,
                                                                 conn=conn)[0]

            return dataModels
        finally:
            if not extConn:
                conn.close()
Exemple #3
0
    def loadResults(self, patientId, relativeTime, conn=None):
        """Load all results active by the given relativeTime.
        Will look for sim_patient_state times and sim_patient_order for diagnostic orders,
        to extrapolate all state-specific results for each order, or using default values
        if no state specific ones available.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            # First query for all expected result labels and states, without state-specific values as
            #   may want outer join behvaior against default state values
            query = SQLQuery()

            query.addSelect("distinct spo.sim_state_id")
            query.addSelect("sr.sim_result_id")
            query.addSelect("sr.name")
            query.addSelect("sr.description")
            query.addSelect("sr.priority")
            query.addSelect("sr.group_string")

            query.addSelect("spo.relative_time_start")
            query.addSelect("sorm.turnaround_time")
            query.addSelect(
                "(spo.relative_time_start + sorm.turnaround_time) as result_relative_time"
            )

            query.addFrom("sim_patient_order as spo")
            query.addFrom("sim_order_result_map as sorm")
            query.addFrom("sim_result as sr")

            query.addWhere("spo.clinical_item_id = sorm.clinical_item_id")
            query.addWhere("sorm.sim_result_id = sr.sim_result_id")

            query.addWhereEqual("spo.sim_patient_id", patientId)
            # Only unlock results if appropiate prereq orders were placed in the past (and longer than the turnaround time)
            query.addWhereOp("spo.relative_time_start + sorm.turnaround_time",
                             "<=", relativeTime)
            # Also check that the triggering order was not cancelled before the completion of the turnaround time
            query.addWhere(
                "( spo.relative_time_end is null or spo.relative_time_start + sorm.turnaround_time <= spo.relative_time_end )"
            )

            query.addOrderBy("result_relative_time")
            query.addOrderBy("sr.priority")

            resultTable = DBUtil.execute(query,
                                         includeColumnNames=True,
                                         conn=conn)
            resultModels = modelListFromTable(resultTable)

            # Pass through results to get set of states to search for
            stateIds = set([DEFAULT_STATE_ID
                            ])  # Include default state to fall back on
            for resultModel in resultModels:
                stateIds.add(resultModel["sim_state_id"])

            # Second query for state-specific values
            valueQuery = SQLQuery()
            valueQuery.addSelect("ssr.sim_state_id")
            valueQuery.addSelect("ssr.sim_result_id")
            valueQuery.addSelect("ssr.num_value")
            valueQuery.addSelect("ssr.num_value_noise")
            valueQuery.addSelect("ssr.text_value")
            valueQuery.addSelect("ssr.result_flag")
            valueQuery.addSelect(
                "ssr.clinical_item_id"
            )  # Output clinical item if result flag means something
            valueQuery.addFrom("sim_state_result as ssr")
            valueQuery.addWhereIn("ssr.sim_state_id", stateIds)
            valueTable = DBUtil.execute(valueQuery,
                                        includeColumnNames=True,
                                        conn=conn)
            valueModels = modelListFromTable(valueTable)

            # Store in-memory dictionary for rapid cross-referencing "join" to result table
            valueModelByStateIdByResultId = dict()
            for valueModel in valueModels:
                resultId = valueModel["sim_result_id"]
                stateId = valueModel["sim_state_id"]
                if resultId not in valueModelByStateIdByResultId:
                    valueModelByStateIdByResultId[resultId] = dict()
                valueModelByStateIdByResultId[resultId][stateId] = valueModel

            # Now go back through original results and join up state-specific values, or use default values if needed
            resultValueModels = list()
            for resultModel in resultModels:
                resultId = resultModel["sim_result_id"]
                stateId = resultModel["sim_state_id"]
                if resultId in valueModelByStateIdByResultId:
                    valueModelByStateId = valueModelByStateIdByResultId[
                        resultId]
                    if stateId in valueModelByStateId:
                        # Have a state-specific value, populate that
                        valueModel = valueModelByStateId[stateId]
                        resultModel.update(valueModel)
                    elif DEFAULT_STATE_ID in valueModelByStateId:
                        # No state-specific value, but have a default one to populate instead
                        valueModel = valueModelByStateId[DEFAULT_STATE_ID]
                        resultModel.update(valueModel)
                    resultValueModels.append(resultModel)
                else:
                    # No result information available, even in default state. Skip these
                    #resultModel["num_value"] = None
                    #resultModel["num_value_noise"] = None
                    #resultModel["text_value"] = None
                    #resultModel["result_flag"] = None
                    #resultModel["clinical_item_id"] = None
                    pass

            return resultValueModels
        finally:
            if not extConn:
                conn.close()
Exemple #4
0
def queryPatientEpisodes(outputFile, extractor):
    log.info(
        "Select patient admissions with provider category of Tt Pamf Med (Primary) or Tt Med Univ (Primary)"
    )

    conn = DBUtil.connection()
    cursor = conn.cursor()
    try:
        # # Clinical item category for admission diagnoses
        # # ADMIT_DX_CATEGORY_ID = 2;
        # admitDxCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like '%%ADMIT_DX%%'", conn=conn)[0][0];

        # # Look for items indicating suspected infection / sepsis
        # ivAntibioticItemIds = loadIVAntibioticItemIds(extractor);
        # bloodCultureItemIds = loadBloodCultureItemIds(extractor);
        # respiratoryViralPanelItemIds = loadRespiratoryViralPanelItemIds(extractor);

        # # Merge IV antibiotics and blood cultures, respiratory panels as items that suggest sepsis is suspected
        # suspectSepsisItemIds = ivAntibioticItemIds.union(bloodCultureItemIds.union(respiratoryViralPanelItemIds));
        # suspectSepsisItemIdsStr = str.join(',', [str(itemId) for itemId in suspectSepsisItemIds]);   # Convert to comma-separated string via a str.join function on list contracture

        # # Look for primary surgery teams to exclude
        # excludeTeamCategory = "SurgerySpecialty";
        # excludeTreatmentTeams = list();
        # for row in extractor.loadMapData("TreatmentTeamGroups"):
        #     if row["team_category"] == excludeTeamCategory:
        #         excludeTreatmentTeams.append(row["treatment_team"]);
        # query = SQLQuery();
        # query.addSelect("clinical_item_id");
        # query.addFrom("clinical_item");
        # query.addWhereIn("description", excludeTreatmentTeams );
        # excludeTeamItemIds = set();
        # for row in DBUtil.execute(query, conn=conn):
        #     excludeTeamItemIds.add(row[0]);
        # excludeTeamItemIdsStr = str.join(',', [str(itemId) for itemId in excludeTeamItemIds]);   # Convert to comma-separated string via a str.join function on list contracture

        # First pass query to get the list of patients and emergency department presentation times
        cohortQuery = \
        """
        select adt1.pat_anon_id, adt1.pat_enc_csn_anon_id, adt1.shifted_transf_in_dt_tm as edAdmitTime, adt2.shifted_transf_out_dt_tm as dischargeTime
        from stride_adt as adt1, stride_adt as adt2
        where 
            adt1.pat_anon_id in
            (select patient_id from patient_item inner join clinical_item on patient_item.clinical_item_id = clinical_item.clinical_item_id where clinical_item.clinical_item_category_id = 161 AND clinical_item.description = '%s') 
        and adt1.pat_enc_csn_anon_id = adt2.pat_enc_csn_anon_id
        """ % ("Tt Pamf Med (Primary)")

        print(cohortQuery, file=sys.stderr)
        cursor.execute(cohortQuery)

        patientEpisodes = list()
        patientEpisodeById = dict()

        # Collect Build basic patient ID and
        #   ED presentation dates and Discharge date/time
        prog = ProgressDots()
        row = cursor.fetchone()
        while row is not None:
            (patientId, encounterId, edAdmitTime, dischargeTime) = row
            #patientId = int(patientId);
            patientEpisode = \
                RowItemModel \
                (   {   "patient_id":patientId,
                        "edAdmitTime":edAdmitTime,
                        "dischargeTime":dischargeTime,
                        "encounter_id":encounterId,
                        "payorTitle": None, # Default encounter data to null in case can't find it later
                        "bpSystolic": None,
                        "bpDiastolic": None,
                        "temperature": None,
                        "pulse": None,
                        "respirations": None,
                    }
                )
            patientEpisodes.append(patientEpisode)
            if patientEpisode["encounter_id"] not in patientEpisodeById:
                patientEpisodeById[
                    patientEpisode["encounter_id"]] = patientEpisode

            prog.update()
            row = cursor.fetchone()
        prog.printStatus()

        # Second query phase to link to encounter information (e.g., insurance, admitting vital signs)
        encounterIds = columnFromModelList(patientEpisodes, "encounter_id")
        query = SQLQuery()
        query.addSelect("pat_id")
        query.addSelect("pat_enc_csn_id")
        query.addSelect("title")
        query.addSelect("bp_systolic")
        query.addSelect("bp_diastolic")
        query.addSelect("temperature")
        query.addSelect("pulse")
        query.addSelect("respirations")
        query.addFrom("stride_patient_encounter")
        query.addWhereIn("pat_enc_csn_id", encounterIds)
        cursor.execute(str(query), query.params)
        row = cursor.fetchone()
        while row is not None:
            (patientId, encounterId, payorTitle, bpSystolic, bpDiastolic,
             temperature, pulse, respirations) = row
            if encounterId in patientEpisodeById:
                patientEpisode = patientEpisodeById[encounterId]
                if patientEpisode["payorTitle"] is None:
                    patientEpisode["payorTitle"] = set()
                    # Single encounters may have multiple payors to track
                patientEpisode["payorTitle"].add(payorTitle)
                patientEpisode["bpSystolic"] = bpSystolic
                patientEpisode["bpDiastolic"] = bpDiastolic
                patientEpisode["temperature"] = temperature
                patientEpisode["pulse"] = pulse
                patientEpisode["respirations"] = respirations
            row = cursor.fetchone()

        # Drop results as tab-delimited text output
        formatter = TextResultsFormatter(outputFile)
        formatter.formatResultDicts(patientEpisodes, addHeaderRow=True)

        return patientEpisodes
    finally:
        cursor.close()
        conn.close()
Exemple #5
0
    def _query_patient_episodes(self):
        log.info('Querying patient episodes...')
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Build parameters for query.
        self._lab_components = self._get_components_in_lab_panel()
        random_patient_list = self._get_random_patient_list()

        #print("RANDOM PATIENT LIST", random_patient_list)

        # Build SQL query for list of patient episodes.
        # Note that for 2008-2014 data, result_flag can take on any of the
        # following values:  High, Low, High Panic, Low Panic,
        # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, *
        # (NONE): Only 27 lab components can have this flag. None has this
        #           value for more than 5 results, so ignore it.
        # *: Only 10 lab components can have this flag. Only 6 have it for
        #           >10 tests, and each of them is a microbiology test for which
        #           a flag is less meaningful, e.g. Gram Stain, AFB culture.
        # Susceptible: Only 15 lab components can have this flag. All of those
        #           only have this value for 2 results, so ignore it.
        # Resistant: Only 1 lab component can have this flag. Only two results
        #           have this value, so ignore it.
        # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD)
        #           have this value for thousands of results, so include it.
        # Negative: Only 10 lab components can have this flag, and all for
        #           less than 5 results, so ignore it.
        # Positive: Only 3 lab components can have this flag, and all for
        #           only 1 result, so ignore it.
        # Low Off-Scale: Only 1 lab component can have this flag, and only for
        #           3 results, so ignore it.
        # Low Panic: 1401 lab components can have this flag, many core
        #           metabolic components. Include it.
        # High Panic: 8084 lab components can have this flag, many core
        #           metabolic components. Include it.
        query = SQLQuery()
        query.addSelect('CAST(pat_anon_id AS BIGINT) AS pat_id')
        query.addSelect('CAST(shifted_order_time AS TIMESTAMP)')
        query.addSelect('stride_culture_micro.proc_code')
        query.addSelect('organism_name')  #one for the result

        # Experimenting
        susceptibility_flags = [
            'Trimethoprim/Sulfamethoxazole', 'Vancomycin', 'Penicillin',
            'Levofloxacin', 'Clindamycin', 'Ceftriaxone', 'Erythromycin',
            'Ampicillin', 'Meropenem', 'Ciprofloxacin', 'Cefepime',
            'Aztreonam.', 'Ampicillin/Sulbactam', 'Piperacillin/Tazobactam',
            'Linezolid', 'Oxacillin.', 'Cefazolin', 'Daptomycin'
        ]

        for med in susceptibility_flags:
            med_col = med.replace('/', '_').replace('.', '')
            query.addSelect(
                "MAX(CASE WHEN antibiotic_name = '%s' AND (suseptibility = 'Susceptible' OR suseptibility = 'Positive') THEN 1 ELSE 0 END) AS %s_Susc"
                % (med, med_col))
            query.addSelect(
                "MAX(CASE WHEN antibiotic_name = '%s' THEN 1 ELSE 0 END) as %s_tested"
                % (med, med_col))

        # Let us look at top 10 commonly occuring bacteria
        query.addSelect(
            "CASE WHEN organism_name IS NULL THEN 0 ELSE 1 END AS BACTERIA_PRESENT"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'ESCHERICHIA COLI' THEN 1 ELSE 0 END AS ESCHERICHIA_COLI"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'STAPHYLOCOCCUS AUREUS' THEN 1 ELSE 0 END AS STAPHYLOCOCCUS_AUREUS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'ENTEROCOCCUS SPECIES' THEN 1 ELSE 0 END AS ENTEROCOCCUS_SPECIES"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'KLEBSIELLA PNEUMONIAE' THEN 1 ELSE 0 END AS KLEBSIELLA_PNEUMONIAE"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'PSEUDOMONAS AERUGINOSA' THEN 1 ELSE 0 END AS PSEUDOMONAS_AERUGINOSA"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'COAG NEGATIVE STAPHYLOCOCCUS' THEN 1 ELSE 0 END AS COAG_NEGATIVE_STAPHYLOCOCCUS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'ENTEROCOCCUS FAECALIS' THEN 1 ELSE 0 END AS ENTEROCOCCUS_FAECALIS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'PROTEUS MIRABILIS' THEN 1 ELSE 0 END AS PROTEUS_MIRABILIS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'CANDIDA ALBICANS' THEN 1 ELSE 0 END AS CANDIDA_ALBICANS"
        )

        query.addFrom('stride_culture_micro')

        query.addWhereIn("stride_culture_micro.proc_code", self._lab_panel)
        query.addWhereIn("pat_anon_id", random_patient_list)
        query.addGroupBy('pat_anon_id')
        query.addGroupBy('shifted_order_time')
        query.addGroupBy('stride_culture_micro.proc_code')
        query.addGroupBy('organism_name')
        query.addOrderBy('pat_anon_id')
        query.addOrderBy('shifted_order_time')
        query.addOrderBy('stride_culture_micro.proc_code')
        query.addOrderBy('organism_name')
        query.setLimit(self._num_requested_episodes)

        self._num_reported_episodes = FeatureMatrix._query_patient_episodes(
            self,
            query,
            pat_id_col='pat_id',
            index_time_col='shifted_order_time')
from Const import RUNNER_VERBOSITY
from Util import log
from Util import DBTestCase

os.chdir('/Users/jwang/Desktop/ClinicalDecisionMaker')
from medinfo.common.test.Util import MedInfoTestCase
from medinfo.db import DBUtil
from medinfo.db.Model import SQLQuery

# Create SCRIPT_FILE
SCRIPT_FILE = StringIO()
SCRIPT_FILE.write("psql medinfo jwang198")
DATA_QUERY = SQLQuery()

# From patient_item or clinical_item
DATA_QUERY.addSelect("clinical_item_id")
# DATA_QUERY.addSelect("clinical_item_category_id")
# DATA_QUERY.addSelect('name')
DATA_QUERY.addSelect("description")
# DATA_QUERY.addSelect("item_count")

# Join
DATA_QUERY.addFrom("clinical_item")
DATA_QUERY.addWhereEqual("clinical_item_category_id", "2")

DATA_QUERY.addOrderBy("item_count", dir="DESC")

print(DATA_QUERY)

# Write out data to CSV
DBUtil.runDBScript(SCRIPT_FILE, False)
Exemple #7
0
os.chdir('/Users/jwang/Desktop/ClinicalDecisionMaker')
from medinfo.common.test.Util import MedInfoTestCase
from medinfo.db import DBUtil
from medinfo.db.Model import SQLQuery

# Create SCRIPT_FILE
SCRIPT_FILE = StringIO()
SCRIPT_FILE.write("psql medinfo jwang198")

# Find all clinical_item_ids associated with each patient_id
# SELECT * FROM clinical_item WHERE clinical_item_category_id = 161;

DATA_QUERY = SQLQuery()

# From patient_item or clinical_item
DATA_QUERY.addSelect("patient_id")
DATA_QUERY.addSelect("clinical_item_category_id")
DATA_QUERY.addSelect('name')
DATA_QUERY.addSelect("description")

# Join
DATA_QUERY.addFrom("patient_item")
DATA_QUERY.addJoin(
    "clinical_item",
    "patient_item.clinical_item_id = clinical_item.clinical_item_id",
    joinType="INNER")
DATA_QUERY.addWhereEqual("clinical_item_category_id = 161 AND description",
                         "Tt Med Univ (Primary)")  # Everyone
#DATA_QUERY.addWhereEqual("clinical_item_category_id = 161 AND description", "Tt Pamf Med (Primary)") # Expert

DATA_QUERY.addOrderBy("patient_id", dir="ASC")
Exemple #8
0
    def grade_cases(self, sim_patient_ids, sim_grader_id, conn=None):
        """Given the identifiers for a bunch of simulated physician-patient case
        records, and the identifier for a particular grading key to use,
        calculate what grade each case would get based on the choices made
        and return a dictionary of case grades (keyed by the case ID).
        """
        ext_conn = True
        if conn is None:
            conn = self.connFactory.connection()
            ext_conn = False
        try:
            # Inner query retrieves physician-patient cases with ranking group_names (to later select first)
            # per case for specified cases. Each NULL group_name is treated as a separate group by assigning it
            # sim_patient_order_id. It also omits Default user (sim_user_id = 0) from grading.
            inner_query = SQLQuery()
            inner_query.addSelect("score")
            inner_query.addSelect(
                "rank() over ("  # ranks rows incrementally in the same group
                "    partition by coalesce(group_name, sim_patient_order_id::text), sim_patient_id"
                "    order by sim_patient_order_id"
                ")")
            inner_query.addSelect("sim_user_id")
            inner_query.addSelect("sim_patient_id")
            inner_query.addSelect("sim_grader_id")
            inner_query.addFrom("sim_patient_order spo")
            inner_query.addJoin(
                "sim_grading_key sgk",
                "sgk.clinical_item_id = spo.clinical_item_id"
                "    and sgk.sim_state_id = spo.sim_state_id")
            inner_query.addWhereEqual("sgk.sim_grader_id", sim_grader_id)
            inner_query.addWhereNotEqual(
                "spo.sim_user_id",
                0)  # 0 = ignore 'Default user', sets up initial cases
            inner_query.addWhereIn("spo.sim_patient_id", sim_patient_ids)
            inner_query.addOrderBy("relative_time_start")
            inner_query.addOrderBy("sim_patient_order_id")

            # Outer query sums the score per patient case and selects most graded physician for the case.
            # Theoretically, it isn't necessarily the most active physician for the case since his orders
            # might have been dropped by selecting only the first record within group_name group.
            query = SQLQuery()
            query.addSelect("sim_patient_id")
            query.addSelect("sim_grader_id")
            query.addSelect("sum(score) as total_score")
            query.addSelect(
                "mode() within group ("  # mode() selects most frequent value within group
                "    order by sim_user_id"
                ") as most_graded_user_id")
            query.addFrom("(" + str(inner_query) + ") as ranked_groups")
            query.addWhereEqual("ranked_groups.rank",
                                1)  # count only first order in the same group
            query.addGroupBy("sim_patient_id")
            query.addGroupBy("sim_grader_id")

            query_params = inner_query.getParams() + query.getParams()
            grades_table = DBUtil.execute(query,
                                          query_params,
                                          includeColumnNames=True,
                                          conn=conn)
            grades_model = modelListFromTable(grades_table)

            # get most active users for the cases
            most_active_user_query = SQLQuery()
            most_active_user_query.addSelect("sim_patient_id")
            most_active_user_query.addSelect("mode() within group ("
                                             "    order by sim_user_id"
                                             ") as most_active_user_id")
            most_active_user_query.addFrom("sim_patient_order")
            most_active_user_query.addWhereNotEqual("sim_user_id",
                                                    0)  # ignore Default user
            most_active_user_query.addWhereIn("sim_patient_id",
                                              sim_patient_ids)
            most_active_user_query.addGroupBy("sim_patient_id")
            most_active_user_query.addOrderBy("sim_patient_id")

            most_active_user_table = DBUtil.execute(most_active_user_query,
                                                    includeColumnNames=True,
                                                    conn=conn)
            most_active_user_model = modelListFromTable(most_active_user_table)
            # make a dict by sim_patient_id out of results - will be used for combining
            most_active_user_dict = {
                most_active_user["sim_patient_id"]: most_active_user
                for most_active_user in most_active_user_model
            }

            # combine results
            complete_grades = [
                grade.update(most_active_user_dict[grade["sim_patient_id"]])
                for grade in grades_model
            ]

            return complete_grades

        finally:
            if not ext_conn:
                conn.close()
Exemple #9
0
    def querySourceItems(self,
                         startDate=None,
                         endDate=None,
                         progress=None,
                         conn=None):
        """Query the database for list of all source clinical items (lab results in this case)
        and yield the results one at a time.  If startDate provided, only return items
        whose result_time is on or after that date.
        Only include results records where the result_flag is set to an informative value,
        to focus only on abnormal lab results (including would be a ton more relatively uninformative
        data that would greatly expend data space and subsequent computation time)
        """
        extConn = conn is not None
        if not extConn:
            conn = self.connFactory.connection()

        # Column headers to query for that map to respective fields in analysis table
        headers = [
            "sor.order_proc_id", "pat_id", "pat_enc_csn_id", "order_type",
            "proc_id", "proc_code", "base_name", "component_name",
            "common_name", "ord_num_value", "result_flag",
            "result_in_range_yn", "sor.result_time"
        ]

        query = SQLQuery()
        for header in headers:
            query.addSelect(header)
        query.addFrom("stride_order_proc as sop")
        query.addFrom("%s as sor" % SOURCE_TABLE)
        query.addWhere("sop.order_proc_id = sor.order_proc_id")
        #query.addWhere("result_flag <> '*'");   # Will exclude nulls and the uninformative '*' values for text-based microbiology results
        if startDate is not None:
            query.addWhereOp("sor.result_time", ">=", startDate)
        if endDate is not None:
            query.addWhereOp("sor.result_time", "<", endDate)

        # Query to get an estimate of how long the process will be
        if progress is not None:
            progress.total = DBUtil.execute(query.totalQuery(),
                                            conn=conn)[0][0]

        cursor = conn.cursor()
        # Do one massive query, but yield data for one item at a time.
        cursor.execute(str(query), tuple(query.params))

        row = cursor.fetchone()
        while row is not None:
            rowModel = RowItemModel(row, headers)
            # Normalize qualified labels
            rowModel["order_proc_id"] = rowModel["sor.order_proc_id"]
            rowModel["result_time"] = rowModel["sor.result_time"]

            if rowModel['base_name'] is None:
                row = cursor.fetchone()
                continue

            self.populateResultFlag(rowModel, conn=conn)

            yield rowModel
            # Yield one row worth of data at a time to avoid having to keep the whole result set in memory
            row = cursor.fetchone()

        # Slight risk here.  Normally DB connection closing should be in finally of a try block,
        #   but using the "yield" generator construct forbids us from using a try, finally construct.
        cursor.close()

        if not extConn:
            conn.close()
Exemple #10
0
    def _query_patient_episodes(self):
        log.info('Querying patient episodes...')
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Build parameters for query.
        self._lab_components = self._get_components_in_lab_panel()
        random_patient_list = self._get_random_patient_list()

        #print("RANDOM PATIENT LIST", random_patient_list)

        # Build SQL query for list of patient episodes.
        # Note that for 2008-2014 data, result_flag can take on any of the
        # following values:  High, Low, High Panic, Low Panic,
        # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, *
        # (NONE): Only 27 lab components can have this flag. None has this
        #           value for more than 5 results, so ignore it.
        # *: Only 10 lab components can have this flag. Only 6 have it for
        #           >10 tests, and each of them is a microbiology test for which
        #           a flag is less meaningful, e.g. Gram Stain, AFB culture.
        # Susceptible: Only 15 lab components can have this flag. All of those
        #           only have this value for 2 results, so ignore it.
        # Resistant: Only 1 lab component can have this flag. Only two results
        #           have this value, so ignore it.
        # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD)
        #           have this value for thousands of results, so include it.
        # Negative: Only 10 lab components can have this flag, and all for
        #           less than 5 results, so ignore it.
        # Positive: Only 3 lab components can have this flag, and all for
        #           only 1 result, so ignore it.
        # Low Off-Scale: Only 1 lab component can have this flag, and only for
        #           3 results, so ignore it.
        # Low Panic: 1401 lab components can have this flag, many core
        #           metabolic components. Include it.
        # High Panic: 8084 lab components can have this flag, many core
        #           metabolic components. Include it.
        query = SQLQuery()
        query.addSelect('CAST(pat_anon_id AS BIGINT) AS pat_id')
        query.addSelect('CAST(shifted_order_time AS TIMESTAMP)')
        query.addSelect('stride_culture_micro.proc_code')
        query.addSelect('organism_name')  #one for the result

        # Let us look at top 10 commonly occuring bacteria
        query.addSelect(
            "CASE WHEN organism_name IS NULL THEN 1 ELSE 0 END AS NO_BACTERIA")
        query.addSelect(
            "CASE WHEN organism_name = 'ESCHERICHIA COLI' THEN 1 ELSE 0 END AS ESCHERICHIA_COLI"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'STAPHYLOCOCCUS AUREUS' THEN 1 ELSE 0 END AS STAPHYLOCOCCUS_AUREUS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'ENTEROCOCCUS SPECIES' THEN 1 ELSE 0 END AS ENTEROCOCCUS_SPECIES"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'KLEBSIELLA PNEUMONIAE' THEN 1 ELSE 0 END AS KLEBSIELLA_PNEUMONIAE"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'PSEUDOMONAS AERUGINOSA' THEN 1 ELSE 0 END AS PSEUDOMONAS_AERUGINOSA"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'COAG NEGATIVE STAPHYLOCOCCUS' THEN 1 ELSE 0 END AS COAG_NEGATIVE_STAPHYLOCOCCUS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'ENTEROCOCCUS FAECALIS' THEN 1 ELSE 0 END AS ENTEROCOCCUS_FAECALIS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'PROTEUS MIRABILIS' THEN 1 ELSE 0 END AS PROTEUS_MIRABILIS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'CANDIDA ALBICANS' THEN 1 ELSE 0 END AS CANDIDA_ALBICANS"
        )

        query.addFrom('stride_culture_micro')

        query.addWhereIn("stride_culture_micro.proc_code", [self._lab_panel])
        query.addWhereIn("pat_id", random_patient_list)
        query.addGroupBy('pat_id')
        query.addGroupBy('shifted_order_time')
        query.addGroupBy('stride_culture_micro.proc_code')
        query.addGroupBy('organism_name')
        query.addOrderBy('pat_id')
        query.addOrderBy('shifted_order_time')
        query.addOrderBy('stride_culture_micro.proc_code')
        query.addOrderBy('organism_name')
        query.setLimit(self._num_requested_episodes)

        self._num_reported_episodes = FeatureMatrix._query_patient_episodes(
            self,
            query,
            pat_id_col='pat_id',
            index_time_col='shifted_order_time')
Exemple #11
0
class TestDBUtil(DBTestCase):
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self);

        self.SCRIPT_FILE = StringIO()
        self.SCRIPT_FILE.write("# Create table to test on.  Also testing that comment tag is recognized\n")
        self.SCRIPT_FILE.write("\n")
        self.SCRIPT_FILE.write("create table TestTypes\n")
        self.SCRIPT_FILE.write("(\n")
        self.SCRIPT_FILE.write("    TestTypes_id    serial,\n")
        self.SCRIPT_FILE.write("    MyText          varchar(50),    /* Same as character varying, also test standard SQL comment tags */\n")
        self.SCRIPT_FILE.write("    MyInteger       integer,\n")
        self.SCRIPT_FILE.write("    MyReal          real,   -- Floating point number, also test standard SQL comment tag\n")
        self.SCRIPT_FILE.write("    MyDateTime      TIMESTAMP,   -- PostgreSQL uses TIMESTAMP, but MySQL doesn't do NULL values right, so have to use DATETIME for MySQL?\n")
        self.SCRIPT_FILE.write("    MyYesNo         boolean\n")
        self.SCRIPT_FILE.write(");\n")
        self.SCRIPT_FILE.write("ALTER TABLE TestTypes ADD CONSTRAINT TestTypes_id PRIMARY KEY (TestTypes_id);\n");  # Should auto-create testtypes_testtypes_id_seq sequence
        self.SCRIPT_FILE.write("CREATE INDEX TestTypes_MyInteger_INDEX ON TestTypes(MyInteger);\n")
        self.SCRIPT_FILE.write("\n")
        self.SCRIPT_FILE.write("insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n")
        self.SCRIPT_FILE.write("values ('Sample Text',  123,123.45,'2004-09-08 19:41:47.292000',True);\n")
        self.SCRIPT_FILE.write("insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n")
        self.SCRIPT_FILE.write("values ('Joe Mama',     234,23.45,'1990-10-03 19:41:47.292000',False);\n")
        self.SCRIPT_FILE.write("insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n")
        self.SCRIPT_FILE.write("values ('Mo Fo',        345,3.45,'2014-01-04 19:41:47.292000',True);\n")
        self.SCRIPT_FILE.write("\n")
        self.SCRIPT_FILE = StringIO(self.SCRIPT_FILE.getvalue())

        self.DATA_TABLE= "TestTypes";
        self.DATA_COLS = "MyInteger\tMyReal\tMyYesNo\tMyText\n";

        self.DATA_FILE = StringIO()
        self.DATA_FILE.write('100\t100.1\tNone\tATest\n');
        self.DATA_FILE.write('200\t200.2\t'+FALSE_STR+'\tNone\n');
        self.DATA_FILE.write('200\t200.2\t'+FALSE_STR+'\t\n');  # Skip None tag at end of line, test that white space won't get lost
        self.DATA_FILE.write('300\t\t'+TRUE_STR+'\tCTest\n');
        self.DATA_FILE = StringIO(self.DATA_FILE.getvalue())

        self.DATA_ROWS = [];
        self.DATA_ROWS.append([100,100.1,None,  "ATest",]);
        self.DATA_ROWS.append([200,200.2,False, None,]);
        self.DATA_ROWS.append([300,None,True,  "CTest",]);

        self.MULTI_LINE_DATA_FILE = StringIO()
        self.MULTI_LINE_DATA_FILE.write('myinteger\t"MyReal"\t"MyYesNo"\tMyText\n');
        self.MULTI_LINE_DATA_FILE.write('100\t100.1\tNone\t"""A"" Test and ""more"""\n');
        self.MULTI_LINE_DATA_FILE.write('200\t200.2\t'+FALSE_STR+'\t""\n');
        self.MULTI_LINE_DATA_FILE.write('300\tNone\t'+TRUE_STR+'\t"C\\nTest"\t\n');
        self.MULTI_LINE_DATA_FILE = StringIO(self.MULTI_LINE_DATA_FILE.getvalue())

        self.MULTI_LINE_DATA_ROWS = [];
        self.MULTI_LINE_DATA_ROWS.append([100,100.1,None,  '"A" Test and "more"',]);
        self.MULTI_LINE_DATA_ROWS.append([200,200.2,False, None,]);
        self.MULTI_LINE_DATA_ROWS.append([300,None,True,  'C\\nTest',]);

        # ID summary data to make it easier to verify stuff
        self.COL_NAMES = self.DATA_COLS.split();
        self.ID_COL = self.COL_NAMES[0];
        self.ID_DATA = [];
        for row in self.DATA_ROWS:
            self.ID_DATA.append(row[0]);

        # Build query to get update rows
        self.DATA_QUERY = SQLQuery();
        for col in self.COL_NAMES:
            self.DATA_QUERY.addSelect(col);
        self.DATA_QUERY.addFrom(self.DATA_TABLE);
        self.DATA_QUERY.addWhereIn(self.ID_COL,self.ID_DATA);
        self.DATA_QUERY.addOrderBy(self.ID_COL);



    def tearDown(self):
        """Restore state from any setUp or test steps"""
        try:
            DBUtil.execute("drop table TestTypes")
            pass
        except Exception, err:
            log.warning(err)
            pass

        DBTestCase.tearDown(self);
Exemple #12
0
    def _query_patient_episodes(self):
        log.info('Querying patient episodes...')
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Build parameters for query.
        self._lab_components = self._get_components_in_lab_panel()
        random_patient_list = self._get_random_patient_list()

        # Build SQL query for list of patient episodes.
        # Note that for 2008-2014 data, result_flag can take on any of the
        # following values:  High, Low, High Panic, Low Panic,
        # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, *
        # (NONE): Only 27 lab components can have this flag. None has this
        #           value for more than 5 results, so ignore it.
        # *: Only 10 lab components can have this flag. Only 6 have it for
        #           >10 tests, and each of them is a microbiology test for which
        #           a flag is less meaningful, e.g. Gram Stain, AFB culture.
        # Susceptible: Only 15 lab components can have this flag. All of those
        #           only have this value for 2 results, so ignore it.
        # Resistant: Only 1 lab component can have this flag. Only two results
        #           have this value, so ignore it.
        # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD)
        #           have this value for thousands of results, so include it.
        # Negative: Only 10 lab components can have this flag, and all for
        #           less than 5 results, so ignore it.
        # Positive: Only 3 lab components can have this flag, and all for
        #           only 1 result, so ignore it.
        # Low Off-Scale: Only 1 lab component can have this flag, and only for
        #           3 results, so ignore it.
        # Low Panic: 1401 lab components can have this flag, many core
        #           metabolic components. Include it.
        # High Panic: 8084 lab components can have this flag, many core
        #           metabolic components. Include it.

        if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE':

            query = SQLQuery()
            '''
            pat_id: hashed patient id
            '''
            query.addSelect('CAST(pat_id AS BIGINT) as pat_id')
            '''
            order_proc_id: unique identifier for an episode
            '''
            query.addSelect('sop.order_proc_id')
            '''
            self._varTypeInTable: usually proc_code or base_name, the column of the lab to be queried
            '''
            query.addSelect(self._varTypeInTable)
            '''
            order_time: The time of the order. Note that sor table does not have this info. 
            '''
            query.addSelect('order_time')
            '''
            y-labels related columns, choose one to predict (for now, use all_components_normal to predict). 
            '''
            if self._isLabPanel:
                query.addSelect(
                    "CASE WHEN abnormal_yn = 'Y' THEN 1 ELSE 0 END AS abnormal_panel"
                )  #
                query.addSelect(
                    "SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL THEN 1 ELSE 0 END) AS num_components"
                )  # sx
                query.addSelect(
                    "SUM(CASE WHEN result_flag IS NULL THEN 1 ELSE 0 END) AS num_normal_components"
                )  # sx
                query.addSelect(
                    "CAST(SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal"
                )  # sx
            else:
                query.addSelect(
                    "CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 0 ELSE 1 END AS component_normal"
                )
            '''
            Relevant tables. Note that sor table does not have patient_id info; need to join sop to obtain it.  
            '''
            query.addFrom('stride_order_proc AS sop')
            query.addFrom('stride_order_results AS sor')
            query.addWhere('sop.order_proc_id = sor.order_proc_id')
            '''
            Condition: self._time_limit[0] < order_time < self._time_limit[1]
            '''
            if self._time_limit:
                if self._time_limit[0]:
                    query.addWhere("sop.order_time > '%s'" %
                                   self._time_limit[0])
                if self._time_limit[1]:
                    query.addWhere("sop.order_time < '%s'" %
                                   self._time_limit[1])

            query.addWhere(
                "(result_flag in ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL)"
            )
            query.addWhereIn(self._varTypeInTable, [self._lab_var])  # sx
            query.addWhereIn("pat_id", random_patient_list)

            query.addGroupBy('pat_id')
            query.addGroupBy('sop.order_proc_id')
            query.addGroupBy(self._varTypeInTable)
            query.addGroupBy('order_time')
            if not self._isLabPanel:
                query.addGroupBy('result_flag')
            query.addGroupBy('abnormal_yn')  #

            query.addOrderBy('pat_id')
            query.addOrderBy('sop.order_proc_id')
            query.addOrderBy(self._varTypeInTable)
            query.addOrderBy('order_time')

            query.setLimit(self._num_requested_episodes)

            self._num_reported_episodes = FeatureMatrix._query_patient_episodes(
                self, query, index_time_col='order_time')

        else:
            '''
            Sqlite3 has an interesting limit for the total number of place_holders in a query, 
            and this limit varies across platforms/operating systems (500-99999 on mac, 999 by defaulty). 
            
            To avoid this problem when querying 1000-10000 patient ids, use string queries instead of the
            default (convenient) routine in DBUtil.  
            '''

            query_str = "SELECT CAST(pat_id AS BIGINT) AS pat_id, order_proc_id, %s, order_time, " % self._varTypeInTable
            if not self._isLabPanel:
                query_str += "CASE WHEN result_in_range_yn = 'Y' THEN 1 ELSE 0 END AS component_normal "
            else:
                query_str += "CAST(SUM(CASE WHEN result_in_range_yn != 'Y' THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal "
            query_str += "FROM labs "
            query_str += "WHERE %s = '%s' " % (self._varTypeInTable,
                                               self._lab_var)
            query_str += "AND pat_id IN "
            pat_list_str = "("
            for pat_id in random_patient_list:
                pat_list_str += str(pat_id) + ","
            pat_list_str = pat_list_str[:-1] + ") "
            query_str += pat_list_str
            query_str += "GROUP BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable
            query_str += "ORDER BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable
            query_str += "LIMIT %d" % self._num_requested_episodes

            self._num_reported_episodes = FeatureMatrix._query_patient_episodes(
                self, query_str, index_time_col='order_time')
Exemple #13
0
    def _get_random_patient_list(self):
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        query = SQLQuery()
        query.addSelect('CAST(pat_id AS BIGINT) AS pat_id')

        if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE':
            if self._isLabPanel:
                query.addSelect('COUNT(sop.order_proc_id) AS num_orders')
                query.addFrom('stride_order_proc AS sop')
                query.addFrom('stride_order_results AS sor')

                if self._time_limit:
                    if self._time_limit[0]:
                        query.addWhere("sop.order_time > '%s'" %
                                       self._time_limit[0])
                    if self._time_limit[1]:
                        query.addWhere("sop.order_time < '%s'" %
                                       self._time_limit[1])

                query.addWhere('sop.order_proc_id = sor.order_proc_id')
                query.addWhereIn('proc_code', [self._lab_var])
                '''
                sbala: Technically it's possible for someone to get a lab ordered without getting results
                '''
                query.addWhereIn("base_name", self._lab_components)

            else:
                query.addSelect('COUNT(sor.order_proc_id) AS num_orders')
                query.addFrom('stride_order_proc AS sop')
                query.addFrom('stride_order_results AS sor')
                query.addWhere('sop.order_proc_id = sor.order_proc_id')
                ##
                query.addWhereIn("base_name", [self._lab_var])
        elif LocalEnv.DATASET_SOURCE_NAME == 'UMich':
            query_str = "SELECT CAST(pat_id AS BIGINT) AS pat_id , "
            query_str += "COUNT(order_proc_id) AS num_orders "
            query_str += "FROM labs "
            # query_str += " WHERE %s IN (%s) "%(self._varTypeInTable, self._lab_var)
            query_str += "WHERE %s = '%s' " % (self._varTypeInTable,
                                               self._lab_var)
            if self.notUsePatIds:
                query_str += "AND pat_id NOT IN ("
                for pat_id in self.notUsePatIds:
                    query_str += "%s," % pat_id
                query_str = query_str[:-1] + ") "  # get rid of comma
            query_str += "GROUP BY pat_id"

            log.debug('Querying median orders per patient...')

            # TODO: best way to integrate UMich code
            results = DBUtil.execute(query_str)
            order_counts = [row[1] for row in results]

            if len(results) == 0:
                error_msg = '0 orders for order "%s."' % self._lab_var  # sx
                log.critical(error_msg)
                raise Exception(error_msg)
                # sys.exit('[ERROR] %s' % error_msg) # sxu: sys.exit cannot be caught by Exception
            else:
                avg_orders_per_patient = numpy.median(order_counts)
                log.info('avg_orders_per_patient: %s' % avg_orders_per_patient)
                # Based on average # of results, figure out how many patients we'd
                # need to get for a feature matrix of requested size.
                self._num_patients = int(numpy.max([self._num_requested_episodes / \
                                                    avg_orders_per_patient, 1]))
                # Some components may have fewer associated patients than the required sample size
                patient_number_chosen = min([len(results),
                                             self._num_patients])  #
                inds_random_patients = numpy.random.choice(
                    len(results), size=patient_number_chosen, replace=False)
                # print 'inds_random_patients:', inds_random_patients
                pat_IDs_random_patients = []
                for ind in inds_random_patients:
                    pat_IDs_random_patients.append(results[ind][0])
                return pat_IDs_random_patients

        else:
            query.addSelect('COUNT(order_proc_id) AS num_orders')
            query.addFrom('labs')
            if self._isLabPanel:
                query.addWhereIn("proc_code", [self._lab_var])
                query.addWhereIn("base_name", self._lab_components)
            else:
                query.addWhereIn("base_name", [self._lab_var])
        '''
        Fo hold-out set, do not use the patients already used in training/validation. 
        '''
        if self._notUsePatIds:
            query.addWhereNotIn('pat_id', self._notUsePatIds)

        query.addGroupBy('pat_id')

        log.debug('Querying the number of orders per patient...')

        results = DBUtil.execute(query)

        order_counts = [row[1] for row in results]

        if len(results) == 0:
            error_msg = '0 orders for component "%s."' % self._lab_var  # sx
            log.critical(error_msg)
            raise Exception(error_msg)
            # sys.exit('[ERROR] %s' % error_msg) # sxu: sys.exit cannot be caught by Exception
        else:
            avg_orders_per_patient = numpy.median(order_counts)
            log.info('avg_orders_per_patient: %s' % avg_orders_per_patient)
            # Based on average # of results, figure out how many patients we'd
            # need to get for a feature matrix of requested size.
            self._num_patients = int(numpy.max([self._num_requested_episodes / \
                                                avg_orders_per_patient, 1]))

            # Some components may have fewer associated patients than the required sample size
            patient_number_chosen = min([len(results), self._num_patients])  #
            '''
            Set seed to ensure re-producibility of patient episodes.
            Recover int random_state here, since numpy requires int while sql requires [-1,1]
            '''
            numpy.random.seed(int(self._random_state * float(sys.maxint)))
            inds_random_patients = numpy.random.choice(
                len(results), size=patient_number_chosen, replace=False)

            pat_IDs_random_patients = [
                results[ind][0] for ind in inds_random_patients
            ]

            return pat_IDs_random_patients
Exemple #14
0
    line = line.strip().split(",")
    if (line[0] not in patient_encounters):
        patient_encounters[line[0]] = [
            pandas.to_datetime(line[1]),
        ]  # patient id: list of admission times
    else:
        patient_encounters[line[0]].append(pandas.to_datetime(line[1]))

# print(patient_encounters)

# Create SCRIPT_FILE
SCRIPT_FILE = StringIO()
SCRIPT_FILE.write("psql stride jwang198")
DATA_QUERY = SQLQuery()

DATA_QUERY.addSelect("pat_id")
DATA_QUERY.addSelect("death_date")
DATA_QUERY.addFrom("stride_patient")
print(DATA_QUERY)

# Write out data to CSV
DBUtil.runDBScript(SCRIPT_FILE, False)
results = DBUtil.execute(DATA_QUERY)
#print(len(results))

output_7 = open("/Users/jwang/Desktop/Results/7_day_deaths.csv", "w")
output_30 = open("/Users/jwang/Desktop/Results/30_day_deaths.csv", "w")
output_90 = open("/Users/jwang/Desktop/Results/90_day_deaths.csv", "w")

output_7.write("patient_id,prev_admission_time,death_time,delta\n")
output_30.write("patient_id,prev_admission_time,death_time,delta\n")
    def orderSetSearch(self, itemQuery, conn=None):
        """Look for clinical items based on specified query criteria"""
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("ic.item_collection_id")
            query.addSelect("ic.external_id")
            query.addSelect("ic.name as collection_name")
            query.addSelect("ic.section")
            query.addSelect("ic.subgroup")
            query.addSelect("ci.clinical_item_category_id")
            query.addSelect("ci.clinical_item_id")
            query.addSelect("ci.name")
            query.addSelect("ci.description")
            query.addFrom("item_collection as ic")
            query.addFrom("item_collection_item as ici")
            query.addFrom("clinical_item as ci")
            query.addWhere("ic.item_collection_id = ici.item_collection_id")
            query.addWhere("ici.clinical_item_id = ci.clinical_item_id")
            query.addWhereNotEqual("ic.section", AD_HOC_SECTION)
            if itemQuery.searchStr is not None:
                searchWords = itemQuery.searchStr.split()
                for searchWord in searchWords:
                    query.addWhereOp(
                        "ic.name", "~*",
                        "^%(searchWord)s|[^a-z]%(searchWord)s" %
                        {"searchWord": searchWord})
                    # Prefix search by regular expression
            if itemQuery.analysisStatus is not None:
                query.addWhereEqual("ci.analysis_status",
                                    itemQuery.analysisStatus)
            query.addOrderBy("lower(ic.name)")
            query.addOrderBy("ic.external_id")
            query.addOrderBy("lower(ic.section)")
            query.addOrderBy("lower(ic.subgroup)")
            query.addOrderBy("ci.clinical_item_id")
            query.addOrderBy("ci.name")
            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)

            # Aggregate up into order sets
            orderSetModel = None
            for row in dataModels:
                if orderSetModel is None or row[
                        "external_id"] != orderSetModel["external_id"]:
                    if orderSetModel is not None:
                        # Prior order set exists, yield/return it before preparing next one
                        yield orderSetModel
                    orderSetModel = \
                        {   "external_id": row["external_id"],
                            "name": row["collection_name"],
                            "itemList": list(),
                        }
                orderSetModel["itemList"].append(row)
            yield orderSetModel
            # Yield the last processed model
        finally:
            if not extConn:
                conn.close()
    def querySourceItems(self, startDate=None, endDate=None, progress=None, conn=None):
        """Query the database for list of all source clinical items (diagnosed probelms in this case)
        and yield the results one at a time.  If startDate provided, only return items
        whose noted_date is on or after that date.
        """
        extConn = conn is not None;
        if not extConn:
            conn = self.connFactory.connection();

        # Facilitate rapid lookup of ICD9/ICD10 codes
        if self.icd9_str_by_code is None:
            self.prepare_icd9_lookup(conn=conn)
        if self.icd10_str_by_code is None:
            self.prepare_icd10_lookup(conn=conn)

        # Column headers to query for that map to respective fields in analysis table
        headers = ["pat_id","pat_enc_csn_id","noted_date","resolved_date","dx_icd9_code","dx_icd9_code_list","dx_icd10_code_list","data_source"];

        query = SQLQuery();
        for header in headers:
            query.addSelect( header );
        query.addFrom("stride_dx_list as dx");
        query.addWhere("noted_date is not null");   # Only work with elements that have dates assigned for now
        if startDate is not None:
            query.addWhereOp("noted_date",">=", startDate);
        if endDate is not None:
            query.addWhereOp("noted_date","<", endDate);

        # Query to get an estimate of how long the process will be
        if progress is not None:
            progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0];

        cursor = conn.cursor();
        # Do one massive query, but yield data for one item at a time.
        cursor.execute( str(query), tuple(query.params) );

        row = cursor.fetchone();
        while row is not None:
            row_model = RowItemModel( row, headers );

            # 2014-2017 data does not have dx_icd9_code. Instead, has
            # both dx_icd9_code_list and dx_icd10_code_list. For these items,
            # there is a one:many mapping of source item to converted item.
            # Collect icd10 codes.
            icd10_codes = set()
            if row_model['dx_icd10_code_list'] not in ['', None]:
                codes = row_model['dx_icd10_code_list'].split(',')
                for code in codes:
                    icd10_codes.add(code)
            # Collect icd9 codes.
            icd9_codes = set()
            if row_model['dx_icd9_code'] not in ['', None]:
                icd9_codes.add(row_model['dx_icd9_code'])
            else:
                if row_model['dx_icd9_code_list'] not in ['', None]:
                    codes = row_model['dx_icd9_code_list'].split(',')
                    for code in codes:
                        icd9_codes.add(code)

            # If there are no ICD codes, skip to next row.
            if len(icd9_codes) == 0 and len(icd10_codes) == 0:
                row = cursor.fetchone()
                continue

            # Process ICD codes.
            # Build a temporary dictionary so that a single loop can take care
            # of both ICD9 and ICD10 without mixing the data.
            icd_versions = {
                'ICD9': {
                    'codes': icd9_codes,
                    'lookup': self.icd9_str_by_code
                },
                'ICD10': {
                    'codes': icd10_codes,
                    'lookup': self.icd10_str_by_code
                }
            }

            for version, info in icd_versions.iteritems():
                icd_codes = info['codes']
                icd_lookup = info['lookup']

                for icd_code in icd_codes:
                    # Look up string. Otherwise default to ICD code.
                    row_model['icd_str'] = icd_code
                    if icd_code in icd_lookup:
                        row_model['icd_str'] = icd_lookup[icd_code]
                    row_model['dx_icd_code'] = version + '.' + icd_code

                    # Yield one row worth of data at a time to avoid having to keep
                    # the whole result set in memory.
                    yield row_model

                    orig_code = icd_code
                    if SUBCODE_DELIM in orig_code:
                        # Insert copies of item for parent node codes to aggregate
                        # component diagnoses into general categories.
                        while icd_code[-1] != SUBCODE_DELIM:
                            icd_code = icd_code[:-1] # Truncate trailing digit
                            if icd_code in icd_lookup:
                                # Found a a matching parent code, so yield this
                                # version.
                                row_model['icd_str'] = icd_lookup[icd_code]
                                row_model['dx_icd_code'] = version + '.' + icd_code
                                yield row_model

                        # One more cycle to get parent node with no subcode
                        # delimiter at all.
                        icd_code = icd_code[:-1] # Truncate off SUBCODE_DELIM
                        if icd_code in icd_lookup:
                            row_model['icd_str'] = icd_lookup[icd_code]
                            row_model['dx_icd_code'] = version + '.' + icd_code

                            yield row_model

            row = cursor.fetchone();
            progress.Update();

        # Slight risk here.  Normally DB connection closing should be in finally of a try block,
        #   but using the "yield" generator construct forbids us from using a try, finally construct.
        cursor.close();

        if not extConn:
            conn.close();
				SELECT
					pat_id,
					medication_id,
					description,
					COUNT(*) as medication_co_occurrence
				FROM
					stride_order_med
				WHERE
					pat_id = {0} AND
					ordering_date > {1}-Xmin AND
					ordering_date < {1}+Xmin
				GROUP BY
					pat_id, medication_id, description
				"""
				DATA_QUERY = SQLQuery();
				DATA_QUERY.addSelect("pat_id")
				DATA_QUERY.addSelect("medication_id")
				DATA_QUERY.addSelect("description")
				DATA_QUERY.addSelect("COUNT(*) as medication_co_occurrence")

				DATA_QUERY.addFrom("stride_order_med")
				DATA_QUERY.addWhere("pat_id = '{0}'".format(pat_id))
				DATA_QUERY.addWhere("ordering_date >= '{0}'".format(window_lower))
				DATA_QUERY.addWhere("ordering_date <= '{0}'".format(window_upper))
				if (pat_id in orderset_order_med_ids.keys()):
					DATA_QUERY.addWhere("order_med_id NOT IN ({0})".format(order_med_id_str)) # do not count order instances ordered with an order set; we only want a la carte instances

				DATA_QUERY.addGroupBy("pat_id")
				DATA_QUERY.addGroupBy("medication_id")
				DATA_QUERY.addGroupBy("description")
Exemple #18
0
    def querySourceItems(self, convOptions, progress=None, conn=None):
        """Query the database for list of all source clinical items (culture results, etc.)
        and yield the results one at a time.  If startDate provided, only return items whose
        occurence date is on or after that date.
        """
        extConn = conn is not None
        if not extConn:
            conn = self.connFactory.connection()

        # Column headers to query for that map to respective fields in analysis table
        headers = [
            "order_proc_anon_id", "pat_anon_id", "pat_enc_csn_anon_id",
            "proc_code", "organism_name", "antibiotic_name", "suseptibility",
            "shifted_result_time"
        ]

        query = SQLQuery()
        for header in headers:
            query.addSelect(header)
        query.addFrom("stride_culture_micro")
        # TODO: FIGURE OUT WHY CAN"T DO >= OPERATION HERE
        # if convOptions.startDate is not None:
        #     query.addWhereOp("shifted_result_time",">=", convOptions.startDate);
        # if convOptions.endDate is not None:
        #     query.addWhereOp("shifted_result_time","<", convOptions.endDate);  # Still use begin date as common filter value

        # Query to get an estimate of how long the process will be
        if progress is not None:
            progress.total = DBUtil.execute(query.totalQuery(),
                                            conn=conn)[0][0]

        cursor = conn.cursor()
        # Do one massive query, but yield data for one item at a time.
        cursor.execute(str(query), tuple(query.params))

        row = cursor.fetchone()
        while row is not None:
            rowModel = RowItemModel(row, headers)

            if rowModel[
                    'shifted_result_time'] is None:  # Don't add if no result time given
                row = cursor.fetchone()
                continue

            if rowModel[
                    'organism_name'] is not None:  # if positive culture but results uninterpretable, don't add feature
                if rowModel['suseptibility'] is None or rowModel[
                        'antibiotic_name'] == 'Method' or rowModel[
                            'antibiotic_name'] is None:
                    row = cursor.fetchone()
                    continue

            try:
                # So that we don't run into directory issues later when writing temp files
                rowModel['antibiotic_name'] = rowModel[
                    'antibiotic_name'].replace('/', '-')

                # Change susceptibility features to either susc or not
                if rowModel['suseptibility'] is not None:
                    if rowModel['suseptibility'] == 'Susceptible' or rowModel[
                            'suseptibility'] == 'Positive':
                        rowModel['suseptibility'] = 'Susceptible'
                    else:
                        rowModel['suseptibility'] = "Not_Susceptible"
            except:  # When antibiotic name is none
                pass

            yield rowModel
            # Yield one row worth of data at a time to avoid having to keep the whole result set in memory
            row = cursor.fetchone()

        # Slight risk here.  Normally DB connection closing should be in finally of a try block,
        #   but using the "yield" generator construct forbids us from using a try, finally construct.
        cursor.close()

        if not extConn:
            conn.close()
Exemple #19
0
    def _query_patient_episodes(self):
        log.info('Querying patient episodes...')
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Build parameters for query.
        self._lab_components = self._get_components_in_lab_panel()
        random_patient_list = self._get_random_patient_list()

        # Build SQL query for list of patient episodes.
        # Note that for 2008-2014 data, result_flag can take on any of the
        # following values:  High, Low, High Panic, Low Panic,
        # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, *
        # (NONE): Only 27 lab components can have this flag. None has this
        #           value for more than 5 results, so ignore it.
        # *: Only 10 lab components can have this flag. Only 6 have it for
        #           >10 tests, and each of them is a microbiology test for which
        #           a flag is less meaningful, e.g. Gram Stain, AFB culture.
        # Susceptible: Only 15 lab components can have this flag. All of those
        #           only have this value for 2 results, so ignore it.
        # Resistant: Only 1 lab component can have this flag. Only two results
        #           have this value, so ignore it.
        # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD)
        #           have this value for thousands of results, so include it.
        # Negative: Only 10 lab components can have this flag, and all for
        #           less than 5 results, so ignore it.
        # Positive: Only 3 lab components can have this flag, and all for
        #           only 1 result, so ignore it.
        # Low Off-Scale: Only 1 lab component can have this flag, and only for
        #           3 results, so ignore it.
        # Low Panic: 1401 lab components can have this flag, many core
        #           metabolic components. Include it.
        # High Panic: 8084 lab components can have this flag, many core
        #           metabolic components. Include it.

        # Note that some episodes have ord_num_value = 9999999
        # These episodes are removed

        query = SQLQuery()
        query.addSelect('CAST(pat_id AS BIGINT)')
        query.addSelect('sop.order_proc_id AS order_proc_id')
        query.addSelect('proc_code')
        query.addSelect('order_time')
        query.addSelect('ord_num_value')
        query.addSelect("CASE WHEN abnormal_yn = 'Y' THEN 1 ELSE 0 END AS abnormal_panel")
        query.addSelect("SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL THEN 1 ELSE 0 END) AS num_components")
        query.addSelect("SUM(CASE WHEN result_flag IS NULL THEN 1 ELSE 0 END) AS num_normal_components")
        query.addSelect("CAST(SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal")
        query.addFrom('stride_order_proc AS sop')
        query.addFrom('stride_order_results AS sor')
        query.addWhere('sop.order_proc_id = sor.order_proc_id')
        query.addWhere("(result_flag in ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL)")
        query.addWhere("ord_num_value != 9999999")
        query.addWhereIn("proc_code", [self._lab_panel])
        query.addWhereIn("pat_id", random_patient_list)
        query.addGroupBy('pat_id')
        query.addGroupBy('sop.order_proc_id')
        query.addGroupBy('proc_code')
        query.addGroupBy('order_time')
        query.addGroupBy('abnormal_yn')
        query.addGroupBy('ord_num_value')
        query.addOrderBy('pat_id')
        query.addOrderBy('sop.order_proc_id')
        query.addOrderBy('proc_code')
        query.addOrderBy('order_time')
        query.addOrderBy('ord_num_value')
        query.setLimit(self._num_requested_episodes)

        self._num_reported_episodes = FeatureMatrix._query_patient_episodes(self, query, index_time_col='order_time')
Exemple #20
0
os.chdir('/Users/jwang/Desktop/ClinicalDecisionMaker')
from medinfo.common.test.Util import MedInfoTestCase
from medinfo.db import DBUtil
from medinfo.db.Model import SQLQuery
from medinfo.db.DBUtil import NUMBER, BOOLEAN, STRING, DATETIME

# Create SCRIPT_FILE

# clinical_item_category_id = 13 (Discharge)
# clinical_item_category_id = 23 (Admission)

SCRIPT_FILE = StringIO()
SCRIPT_FILE.write("psql stride jwang198")
DATA_QUERY = SQLQuery()

DATA_QUERY.addSelect("pat_anon_id")
DATA_QUERY.addSelect("shifted_transf_in_dt_tm")
DATA_QUERY.addSelect("department_in")
DATA_QUERY.addSelect("event_in")
DATA_QUERY.addSelect("shifted_transf_out_dt_tm")
DATA_QUERY.addSelect("event_out")
DATA_QUERY.addSelect("pat_enc_csn_anon_id")
DATA_QUERY.addWhereEqual("event_out = 'Discharge' OR event_in", 'Admission')

DATA_QUERY.addFrom("stride_adt")
print(DATA_QUERY)

# Write out data to CSV
DBUtil.runDBScript(SCRIPT_FILE, False)
results = DBUtil.execute(DATA_QUERY)
Exemple #21
0
    def queryPatientItemsPerPatient(self, analysisOptions, progress=None, conn=None):
        """Query the database for an ordered list of patient clinical items,
        in the order in which they occurred.
        This could be a large amount of data, so option to provide
        list of specific patientIds or date ranges to query for.  In either case,
        results will be returned as an iterator over individual lists
        for each patient.  Lists will contain RowItemModels, each with data:
            * patient_id
            * encounter_id
            * clinical_item_id
            * item_date
            * analyze_date
        """
        extConn = conn is not None;
        if not extConn:
            conn = self.connFactory.connection();

        # Reset for actual data selects
        query= SQLQuery();
        query.addSelect("pi.patient_item_id");
        query.addSelect("pi.patient_id");
        query.addSelect("pi.encounter_id");
        query.addSelect("pi.clinical_item_id");
        query.addSelect("pi.item_date");
        query.addSelect("pi.analyze_date");
        query.addFrom("patient_item as pi");
        query.addFrom("clinical_item as ci");
        query.addWhere("pi.clinical_item_id = ci.clinical_item_id");
        query.addWhere("ci.analysis_status <> 0");  # Skip steps designated to be ignored
        if analysisOptions.patientIds is not None:
            query.addWhereIn("patient_id", analysisOptions.patientIds );
        if analysisOptions.startDate is not None:
            query.addWhereOp("pi.item_date",">=", analysisOptions.startDate);
        if analysisOptions.endDate is not None:
            query.addWhereOp("pi.item_date","<", analysisOptions.endDate);
        query.addOrderBy("pi.patient_id");
        query.addOrderBy("pi.item_date");
        query.addOrderBy("pi.clinical_item_id");

        # Query to get an estimate of how long the process will be
        if progress is not None:
            progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0];

        cursor = conn.cursor();

        # Do one massive query, but yield data for one patient at a time.
        # This should minimize the number of DB queries and the amount of
        #   data that must be kept in memory at any one time.
        cursor.execute( str(query), tuple(query.params) );

        currentPatientId = None;
        currentPatientData = list();

        headers = ["patient_item_id","patient_id","encounter_id","clinical_item_id","item_date","analyze_date"];

        row = cursor.fetchone();
        while row is not None:
            (patient_item_id, patientId, encounter_id, clinicalItemId, itemDate, analyzeDate) = row;
            if currentPatientId is None:
                currentPatientId = patientId;

            if patientId != currentPatientId:
                # Changed user, yield the existing data for the previous user
                yield currentPatientData;
                # Update our data tracking for the current user
                currentPatientId = patientId;
                currentPatientData = list();

            rowModel = RowItemModel( row, headers );
            currentPatientData.append( rowModel );

            row = cursor.fetchone();

        # Yield the final user's data
        yield currentPatientData;

        # Slight risk here.  Normally DB connection closing should be in finally of a try block,
        #   but using the "yield" generator construct forbids us from using a try, finally construct.
        cursor.close();

        if not extConn:
            conn.close();
    def querySourceItems(self,
                         rxcuiDataByMedId,
                         convOptions,
                         progress=None,
                         conn=None):
        """Query the database for list of all source clinical items (medications, etc.)
        and yield the results one at a time.  If startDate provided, only return items whose
        ordering_date is on or after that date.
        """
        extConn = conn is not None
        if not extConn:
            conn = self.connFactory.connection()

        # Column headers to query for that map to respective fields in analysis table
        queryHeaders = [
            "med.order_med_id", "pat_id", "pat_enc_csn_id",
            "med.medication_id", "med.description", "ordering_date",
            "med_route", "number_of_doses", "protocol_id", "protocol_name",
            "section_name", "smart_group"
        ]
        headers = [
            "order_med_id", "pat_id", "pat_enc_csn_id", "medication_id",
            "description", "ordering_date", "med_route", "number_of_doses",
            "protocol_id", "protocol_name", "section_name", "smart_group"
        ]

        query = SQLQuery()
        for header in queryHeaders:
            query.addSelect(header)
        query.addFrom(
            "stride_order_med as med left outer join stride_orderset_order_med as os on med.order_med_id = os.order_med_id"
        )
        # Grab order set links if they exist
        query.addWhere("med.medication_id <> %s" % TEMPLATE_MEDICATION_ID)
        query.addWhere("freq_name not like '%%PRN'")
        # Ignore PRN orders
        if convOptions.startDate is not None:
            query.addWhereOp("ordering_date", ">=", convOptions.startDate)
        if convOptions.endDate is not None:
            query.addWhereOp("ordering_date", "<", convOptions.endDate)

        # Query to get an estimate of how long the process will be
        if progress is not None:
            progress.total = DBUtil.execute(query.totalQuery(),
                                            conn=conn)[0][0]

        cursor = conn.cursor()
        # Do one massive query, but yield data for one item at a time.
        cursor.execute(str(query), tuple(query.params))

        row = cursor.fetchone()
        while row is not None:
            rowModel = RowItemModel(row, headers)
            for normalizedModel in self.normalizeMedData(
                    rxcuiDataByMedId, rowModel, convOptions):
                yield normalizedModel
                # Yield one row worth of data at a time to avoid having to keep the whole result set in memory
            row = cursor.fetchone()

        # Slight risk here.  Normally DB connection closing should be in finally of a try block,
        #   but using the "yield" generator construct forbids us from using a try, finally construct.
        cursor.close()

        if not extConn:
            conn.close()
Exemple #23
0
    def querySourceItems(self, patientIds=None, progress=None, conn=None):
        """Query the database for list of all patient demographics
        and yield the results one at a time.  If patientIds provided, only return items
        matching those IDs.
        """
        extConn = conn is not None
        if not extConn:
            conn = self.connFactory.connection()

        # Column headers to query for that map to respective fields in analysis table
        headers = [
            "pat_id", "birth_year", "gender", "death_date", "race", "ethnicity"
        ]

        query = SQLQuery()
        for header in headers:
            query.addSelect(header)
        query.addFrom("stride_patient as sp")
        if patientIds is not None:
            query.addWhereIn("sp.pat_id", patientIds)

        # Query to get an estimate of how long the process will be
        if progress is not None:
            progress.total = DBUtil.execute(query.totalQuery(),
                                            conn=conn)[0][0]

        cursor = conn.cursor()
        # Do one massive query, but yield data for one item at a time.
        cursor.execute(str(query), tuple(query.params))

        row = cursor.fetchone()
        while row is not None:
            rowModel = RowItemModel(row, headers)

            if rowModel["birth_year"] is None:
                # Blank values, doesn't make sense.  Skip it
                log.warning(rowModel)
            else:
                # Record birth at resolution of year
                rowModel["itemDate"] = datetime(rowModel["birth_year"], 1, 1)
                rowModel["name"] = "Birth"
                rowModel["description"] = "Birth Year"
                yield rowModel

                # Record another at resolution of decade
                decade = (rowModel["birth_year"] / 10) * 10
                rowModel["itemDate"] = datetime(rowModel["birth_year"], 1, 1)
                rowModel["name"] = "Birth%ds" % decade
                rowModel["description"] = "Birth Decade %ds" % decade
                yield rowModel

                # Summarize race and ethnicity information into single field of interest
                raceEthnicity = self.summarizeRaceEthnicity(rowModel)
                rowModel["itemDate"] = datetime(rowModel["birth_year"], 1, 1)
                rowModel["name"] = "Race" + (raceEthnicity.translate(
                    None, " ()-/"))
                # Strip off punctuation
                rowModel["description"] = "Race/Ethnicity: %s" % raceEthnicity
                yield rowModel

                gender = rowModel["gender"].title()
                rowModel["name"] = gender
                rowModel["description"] = "%s Gender" % gender
                yield rowModel

                if rowModel["death_date"] is not None:
                    rowModel["name"] = "Death"
                    rowModel["description"] = "Death Date"
                    rowModel["itemDate"] = rowModel["death_date"]
                    yield rowModel

            row = cursor.fetchone()
            progress.Update()

        # Slight risk here.  Normally DB connection closing should be in finally of a try block,
        #   but using the "yield" generator construct forbids us from using a try, finally construct.
        cursor.close()

        if not extConn:
            conn.close()
    def queryMixSourceItems(self,
                            rxcuiDataByMedId,
                            convOptions,
                            progress=None,
                            conn=None):
        """Query the database for list of source clinical items (medications from mixes, etc.)
        and yield the results one at a time.
        """
        extConn = conn is not None
        if not extConn:
            conn = self.connFactory.connection()

        # Column headers to query for that map to respective fields in analysis table
        queryHeaders = [
            "med.order_med_id", "med.pat_id", "med.pat_enc_csn_id",
            "mix.medication_id", "mix.medication_name", "mix.ingredient_type",
            "med.ordering_date", "med.med_route", "med.number_of_doses",
            "protocol_id", "protocol_name", "section_name", "smart_group"
        ]
        headers = [
            "order_med_id", "pat_id", "pat_enc_csn_id", "medication_id",
            "description", "ingredient_type", "ordering_date", "med_route",
            "number_of_doses", "protocol_id", "protocol_name", "section_name",
            "smart_group"
        ]

        query = SQLQuery()
        for header in queryHeaders:
            query.addSelect(header)
        query.addFrom(
            "stride_order_med as med left outer join stride_orderset_order_med as os on med.order_med_id = os.order_med_id"
        )
        # Grab order set links if they exist
        query.addFrom("stride_order_medmixinfo as mix")
        query.addWhere("med.order_med_id = mix.order_med_id")
        #query.addWhereEqual("med.medication_id", TEMPLATE_MEDICATION_ID );
        #query.addWhere("mix.line = 1"); # Just take the first item from a mix
        query.addWhere("freq_name not like '%%PRN'")
        # Ignore PRN orders
        if convOptions.startDate is not None:
            query.addWhereOp("ordering_date", ">=", convOptions.startDate)
        if convOptions.endDate is not None:
            query.addWhereOp("ordering_date", "<", convOptions.endDate)
        query.addOrderBy("med.ordering_date, med.order_med_id, mix.line")

        # Query to get an estimate of how long the process will be
        if progress is not None:
            progress.total = DBUtil.execute(query.totalQuery(),
                                            conn=conn)[0][0]

        cursor = conn.cursor()
        # Do one massive query, but yield data for one item at a time.
        cursor.execute(str(query), tuple(query.params))

        # Accumulate mixture components one item at a time
        mixByOrderMedId = dict()

        row = cursor.fetchone()
        while row is not None:
            rowModel = RowItemModel(row, headers)
            orderMedId = rowModel["order_med_id"]
            if orderMedId not in mixByOrderMedId:  # New medication mix encountered.  Process any prior ones before moving on
                for normalizedModel in self.normalizeMixData(
                        rxcuiDataByMedId, mixByOrderMedId, convOptions):
                    yield normalizedModel
                    # Yield one row worth of data at a time to avoid having to keep the whole result set in memory

                mixByOrderMedId.clear()
                # Discard previously processed mixes so don't have a ton left in memory
                mixByOrderMedId[orderMedId] = list()
                # Prep for next mix
            mixByOrderMedId[orderMedId].append(rowModel)
            row = cursor.fetchone()
        # One more pass for remaining items
        for normalizedModel in self.normalizeMixData(rxcuiDataByMedId,
                                                     mixByOrderMedId,
                                                     convOptions):
            yield normalizedModel
            # Yield one row worth of data at a time to avoid having to keep the whole result set in memory

        # Slight risk here.  Normally DB connection closing should be in finally of a try block,
        #   but using the "yield" generator construct forbids us from using a try, finally construct.
        cursor.close()

        if not extConn:
            conn.close()
Exemple #25
0
    def copyPatientTemplate(self, patientData, templatePatientId, conn=None):
        """Create a new patient record based on the given template patient ID to copy from.
        Will copy shallow attributes, overridden by any provided in the given patientData,
        as well as any patient states, notes, or physician orders UP TO (and including)
        relative time zero, but not subsequent states, notes, or physician orders 
        (the latter is expected to reflect real user interaction records).
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            templatePatientData = DBUtil.loadRecordModelById("sim_patient",
                                                             templatePatientId,
                                                             conn=conn)
            del templatePatientData[
                "sim_patient_id"]  # Remove prior ID to allow for new one
            templatePatientData.update(
                patientData)  # Override with new content (if exists)
            DBUtil.insertRow("sim_patient", templatePatientData,
                             conn=conn)  # Create new patient record
            patientId = DBUtil.execute(DBUtil.identityQuery("sim_patient"),
                                       conn=conn)[0][0]

            # Copy initial template patient states
            query = SQLQuery()
            query.addSelect("*")  # Copy all columns
            query.addFrom("sim_patient_state as sps")
            query.addWhereEqual("sps.sim_patient_id", templatePatientId)
            query.addWhereOp("relative_time_start", "<=", 0)
            query.addOrderBy("relative_time_start")
            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            nStates = len(dataModels)
            for i, dataModel in enumerate(dataModels):
                del dataModel[
                    "sim_patient_state_id"]  # Discard copied ID to allow new one
                if i == nStates - 1:
                    del dataModel[
                        "relative_time_end"]  # Last state. Blank out end time to reflect open ended for simulation
                dataModel["sim_patient_id"] = patientId
                DBUtil.insertRow("sim_patient_state", dataModel, conn=conn)

            # Copy initial template orders
            query = SQLQuery()
            query.addSelect("*")
            query.addFrom("sim_patient_order as spo")
            query.addWhereEqual("sim_patient_id", templatePatientId)
            query.addWhereOp("relative_time_start", "<=", 0)
            query.addOrderBy("relative_time_start")
            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            for dataModel in dataModels:
                del dataModel["sim_patient_order_id"]
                dataModel["sim_patient_id"] = patientId
                DBUtil.insertRow("sim_patient_order", dataModel, conn=conn)

            conn.commit()  # Transactional commit for multi-step process
            return patientId
        finally:
            if not extConn:
                conn.close()
Exemple #26
0
from medinfo.common.test.Util import MedInfoTestCase;
from medinfo.db import DBUtil
from medinfo.db.Model import SQLQuery;
from medinfo.db.DBUtil import NUMBER, BOOLEAN, STRING, DATETIME;

# Create SCRIPT_FILE
SCRIPT_FILE = StringIO()
SCRIPT_FILE.write("psql stride jwang198")

# Load in top patient count diagnoses (we will only include these as binary covariates)
# select *
# from clinical_item
# where clinical_item_category_id = 2
# order by patient_count desc
DATA_QUERY = SQLQuery()
DATA_QUERY.addSelect("clinical_item_id")
DATA_QUERY.addSelect("name")
DATA_QUERY.addSelect("description")
DATA_QUERY.addSelect("patient_count")
DATA_QUERY.addFrom("clinical_item")
DATA_QUERY.addWhereEqual("patient_count > 0 and clinical_item_category_id", 2)
top_admission_diagnoses_f = open("/Users/jwang/Desktop/Results/top_admission_diagnoses.csv", "w")
top_admission_diagnoses = {}
top_admission_diagnoses_string = []

DBUtil.runDBScript(SCRIPT_FILE, False)
results = DBUtil.execute(DATA_QUERY);
top_admission_diagnoses_f.write("clinical_item_id,name,description,patient_count\n")
for line in results:
	top_admission_diagnoses_f.write("{0},{1},{2},{3}\n".format(line[0],line[1],line[2],line[3]))
	top_admission_diagnoses[line[0]] = line[2]
Exemple #27
0
    def loadPatientOrders(self,
                          patientId,
                          currentTime,
                          loadActive=True,
                          conn=None):
        """Load orders for the given patient that exist by the specified current time point.
        loadActive - Specify whether to load active vs. inactive/completed orders.  Set to None to load both
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("po.sim_patient_order_id")
            query.addSelect("po.sim_user_id")
            query.addSelect("po.sim_patient_id")
            query.addSelect("po.sim_state_id")
            query.addSelect("po.clinical_item_id")
            query.addSelect("po.relative_time_start")
            query.addSelect("po.relative_time_end")
            query.addSelect("ci.name")
            query.addSelect("ci.description")
            query.addSelect("cic.source_table")
            query.addSelect("cic.description as category_description")
            query.addFrom("sim_patient_order as po")
            query.addFrom("clinical_item as ci")
            query.addFrom("clinical_item_category as cic")
            query.addWhere("po.clinical_item_id = ci.clinical_item_id")
            query.addWhere(
                "ci.clinical_item_category_id = cic.clinical_item_category_id")
            query.addWhereEqual("sim_patient_id", patientId)
            query.addWhereOp("relative_time_start", "<=", currentTime)

            if loadActive:  # Filter out inactive orders here.
                query.openWhereOrClause()
                query.addWhere("relative_time_end is null")
                query.addWhereOp("relative_time_end", ">", currentTime)
                query.closeWhereOrClause()
            #elif loadActive is not None:    # Filter out active orders here.
            #    query.addWhereOp("relative_time_end","<=", currentTime)

            if loadActive:  # Organize currently active orders by category
                query.addOrderBy("cic.description")
                query.addOrderBy("ci.description")
                query.addOrderBy("relative_time_start")
            else:  # Otherwise chronologic order
                query.addOrderBy("relative_time_start")
                query.addOrderBy("cic.description")
                query.addOrderBy("ci.description")

            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            return dataModels
        finally:
            if not extConn:
                conn.close()
    def loadNotes(self, patientId, currentTime, conn=None):
        """Load notes committed up to the given simulation time.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()

            query.addSelect("sn.sim_note_id")
            query.addSelect("sps.sim_patient_id")
            # Link
            query.addSelect("sn.sim_state_id")
            query.addSelect("sn.note_type_id")
            query.addSelect("sn.author_type_id")
            query.addSelect("sn.service_type_id")
            query.addSelect(
                "(sps.relative_time_start + sn.relative_state_time) as relative_time"
            )
            query.addSelect("sn.content")

            query.addFrom("sim_note as sn")
            query.addFrom("sim_patient_state as sps")

            query.addWhere("sn.sim_state_id = sps.sim_state_id")
            query.addWhereEqual("sps.sim_patient_id", patientId)
            # Only unlock notes once traverse expected time
            query.addWhereOp(
                "(sps.relative_time_start + sn.relative_state_time)", "<=",
                currentTime)
            query.addOrderBy(
                "(sps.relative_time_start + sn.relative_state_time)")

            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            return dataModels
        finally:
            if not extConn:
                conn.close()
Exemple #29
0
    def loadPendingResultOrders(self, patientId, relativeTime, conn=None):
        """Load all patient orders at the given relativeTime that 
        are due to yield results, but have not yet. 
        Include an estimate of time until results available.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect(
                "distinct po.clinical_item_id"
            )  # Distinct so don't report multiple times for panel orders
            query.addSelect("po.relative_time_start")
            query.addSelect("po.relative_time_end")
            query.addSelect("ci.name")
            query.addSelect("ci.description")
            query.addSelect(
                "sorm.turnaround_time"
            )  # Could have different turnaround times for single order if different sub results. Just report each.
            query.addSelect(
                "sorm.turnaround_time - (%d - po.relative_time_start) as time_until_result"
                % relativeTime)  # Calculate time until expect result
            query.addFrom("sim_patient_order as po")
            query.addFrom("clinical_item as ci")
            query.addFrom("sim_order_result_map as sorm")
            query.addWhere("po.clinical_item_id = ci.clinical_item_id")
            query.addWhere("po.clinical_item_id = sorm.clinical_item_id")
            query.addWhereEqual("sim_patient_id", patientId)

            # Only catch orders up to the given relativeTime and not cancelled
            query.addWhereOp("relative_time_start", "<=", relativeTime)
            query.openWhereOrClause()
            query.addWhere("relative_time_end is null")
            query.addWhereOp("relative_time_end", ">", relativeTime)
            query.closeWhereOrClause()

            # Only PENDING orders, so don't report orders who results should already be available
            query.addWhereOp("sorm.turnaround_time + po.relative_time_start",
                             ">", relativeTime)

            query.addOrderBy("time_until_result")
            query.addOrderBy("relative_time_start")
            query.addOrderBy("ci.name")

            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            return dataModels
        finally:
            if not extConn:
                conn.close()
Exemple #30
0
class TestDBUtil(DBTestCase):
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)

        self.SCRIPT_FILE = StringIO()
        self.SCRIPT_FILE.write(
            "# Create table to test on.  Also testing that comment tag is recognized\n"
        )
        self.SCRIPT_FILE.write("\n")
        self.SCRIPT_FILE.write("create table TestTypes\n")
        self.SCRIPT_FILE.write("(\n")
        self.SCRIPT_FILE.write("    TestTypes_id    serial,\n")
        self.SCRIPT_FILE.write(
            "    MyText          varchar(50),    /* Same as character varying, also test standard SQL comment tags */\n"
        )
        self.SCRIPT_FILE.write("    MyInteger       integer,\n")
        self.SCRIPT_FILE.write(
            "    MyReal          real,   -- Floating point number, also test standard SQL comment tag\n"
        )
        self.SCRIPT_FILE.write(
            "    MyDateTime      TIMESTAMP,   -- PostgreSQL uses TIMESTAMP, but MySQL doesn't do NULL values right, so have to use DATETIME for MySQL?\n"
        )
        self.SCRIPT_FILE.write("    MyYesNo         boolean\n")
        self.SCRIPT_FILE.write(");\n")
        self.SCRIPT_FILE.write(
            "ALTER TABLE TestTypes ADD CONSTRAINT TestTypes_id PRIMARY KEY (TestTypes_id);\n"
        )
        # Should auto-create testtypes_testtypes_id_seq sequence
        self.SCRIPT_FILE.write(
            "CREATE INDEX TestTypes_MyInteger_INDEX ON TestTypes(MyInteger);\n"
        )
        self.SCRIPT_FILE.write("\n")
        self.SCRIPT_FILE.write(
            "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n"
        )
        self.SCRIPT_FILE.write(
            "values ('Sample Text',  123,123.45,'2004-09-08 19:41:47.292000',True);\n"
        )
        self.SCRIPT_FILE.write(
            "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n"
        )
        self.SCRIPT_FILE.write(
            "values ('Joe Mama',     234,23.45,'1990-10-03 19:41:47.292000',False);\n"
        )
        self.SCRIPT_FILE.write(
            "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n"
        )
        self.SCRIPT_FILE.write(
            "values ('Mo Fo',        345,3.45,'2014-01-04 19:41:47.292000',True);\n"
        )
        self.SCRIPT_FILE.write("\n")
        self.SCRIPT_FILE = StringIO(self.SCRIPT_FILE.getvalue())

        self.DATA_TABLE = "TestTypes"
        self.DATA_COLS = "MyInteger\tMyReal\tMyYesNo\tMyText\n"

        self.DATA_FILE = StringIO()
        self.DATA_FILE.write('100\t100.1\tNone\tATest\n')
        self.DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\tNone\n')
        self.DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\t\n')
        # Skip None tag at end of line, test that white space won't get lost
        self.DATA_FILE.write('300\t\t' + TRUE_STR + '\tCTest\n')
        self.DATA_FILE = StringIO(self.DATA_FILE.getvalue())

        self.DATA_ROWS = []
        self.DATA_ROWS.append([
            100,
            100.1,
            None,
            "ATest",
        ])
        self.DATA_ROWS.append([
            200,
            200.2,
            False,
            None,
        ])
        self.DATA_ROWS.append([
            300,
            None,
            True,
            "CTest",
        ])

        self.MULTI_LINE_DATA_FILE = StringIO()
        self.MULTI_LINE_DATA_FILE.write(
            'myinteger\t"MyReal"\t"MyYesNo"\tMyText\n')
        self.MULTI_LINE_DATA_FILE.write(
            '100\t100.1\tNone\t"""A"" Test and ""more"""\n')
        self.MULTI_LINE_DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\t""\n')
        self.MULTI_LINE_DATA_FILE.write('300\tNone\t' + TRUE_STR +
                                        '\t"C\\nTest"\t\n')
        self.MULTI_LINE_DATA_FILE = StringIO(
            self.MULTI_LINE_DATA_FILE.getvalue())

        self.MULTI_LINE_DATA_ROWS = []
        self.MULTI_LINE_DATA_ROWS.append([
            100,
            100.1,
            None,
            '"A" Test and "more"',
        ])
        self.MULTI_LINE_DATA_ROWS.append([
            200,
            200.2,
            False,
            None,
        ])
        self.MULTI_LINE_DATA_ROWS.append([
            300,
            None,
            True,
            'C\\nTest',
        ])

        # ID summary data to make it easier to verify stuff
        self.COL_NAMES = self.DATA_COLS.split()
        self.ID_COL = self.COL_NAMES[0]
        self.ID_DATA = []
        for row in self.DATA_ROWS:
            self.ID_DATA.append(row[0])

        # Build query to get update rows
        self.DATA_QUERY = SQLQuery()
        for col in self.COL_NAMES:
            self.DATA_QUERY.addSelect(col)
        self.DATA_QUERY.addFrom(self.DATA_TABLE)
        self.DATA_QUERY.addWhereIn(self.ID_COL, self.ID_DATA)
        self.DATA_QUERY.addOrderBy(self.ID_COL)

    def tearDown(self):
        """Restore state from any setUp or test steps"""
        try:
            DBUtil.execute("drop table TestTypes")
            pass
        except Exception as err:
            log.warning(err)
            pass

        DBTestCase.tearDown(self)

    def test_runDBScript(self):
        # Just run a DB Script and make sure no ProgrammingErrors are raised.
        DBUtil.runDBScript(self.SCRIPT_FILE, False)

        # Run some other commands to see if scripts produced expected results
        results = DBUtil.execute(
            "select * from TestTypes where MyInteger > %s", (200, ))
        self.assertEqual(2, len(results))

        # Test extra "includeColumnNames" parameter
        results = DBUtil.execute(
            "select TestTypes_id,MyText,MyInteger,MyReal,MyDateTime,MyYesNo from TestTypes where MyInteger < %s",
            (100, ), True)
        expected = [
            "TestTypes_id", "MyText", "MyInteger", "MyReal", "MyDateTime",
            "MyYesNo"
        ]
        for iCol in range(len(expected)):  # Ignore case for comparison
            expected[iCol] = expected[iCol].lower()
            results[0][iCol] = results[0][iCol].lower()
        self.assertEqual(expected, results[0])
        self.assertEqual(0, len(results) - 1)

    def test_runDBScript_commandline(self):
        # Equivalent to test_runDBScript, but try higher level interface
        #   through command-line "main" method
        origStdin = sys.stdin
        sys.stdin = self.SCRIPT_FILE
        argv = ["DBUtil.py", "--script", "-"]
        DBUtil.main(argv)
        sys.stdin = origStdin

        # Run some other commands to see if scripts produced expected results
        results = DBUtil.execute(
            "select * from TestTypes where MyInteger > %s", (200, ))
        self.assertEqual(2, len(results))

        results = DBUtil.execute(
            "select * from TestTypes where MyInteger < %s", (100, ))
        self.assertEqual(0, len(results))

    def test_runDBScript_skipErrors(self):
        # Similar to test_runDBScript_commandline, but test skipErrors option
        origStdin = sys.stdin
        sys.stdin = self.SCRIPT_FILE
        argv = ["DBUtil.py", "--script", "-"]
        DBUtil.main(argv)
        sys.stdin = origStdin

        # Run script again.  Should generate errors from redundant create table, etc.  But skip
        self.SCRIPT_FILE.seek(0)
        origStdin = sys.stdin
        sys.stdin = self.SCRIPT_FILE
        argv = ["DBUtil.py", "--script", "--skipErrors", "-"]
        DBUtil.main(argv)
        sys.stdin = origStdin

        # Run script again.  Should generate errors from redundant create table, etc.  Verify by catch
        self.SCRIPT_FILE.seek(0)
        origStdin = sys.stdin
        sys.stdin = self.SCRIPT_FILE
        argv = ["DBUtil.py", "--script", "-"]
        expectErr = True
        actualErr = False
        try:
            DBUtil.main(argv)
        except Exception as err:
            actualErr = True
        self.assertEqual(expectErr, actualErr)
        sys.stdin = origStdin

    def test_execute_commandline(self):
        # Run basic executes for both an update and a select query, but
        #   using the higher-level command-line "main" method interface

        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        origStdout = sys.stdout
        sys.stdout = StringIO()
        argv = [
            "DBUtil.py",
            "select count(*) from TestTypes where MyInteger > 200", "-"
        ]
        DBUtil.main(argv)
        self.assertEqual(2, int(sys.stdout.getvalue()))
        sys.stdout = origStdout

        origStdout = sys.stdout
        sys.stdout = StringIO()
        argv = [
            "DBUtil.py",
            "insert into TestTypes (MyText,MyInteger,MyYesNo) values ('Another',255,True)",
            "-"
        ]
        DBUtil.main(argv)
        #self.assertEqual( 1, int(sys.stdout.getvalue()) )
        sys.stdout = origStdout

        origStdout = sys.stdout
        sys.stdout = StringIO()
        argv = [
            "DBUtil.py",
            "select count(*) from TestTypes where MyInteger > 200", "-"
        ]
        DBUtil.main(argv)
        self.assertEqual(3, int(sys.stdout.getvalue()))
        sys.stdout = origStdout

        # Different test, includeColumnNames
        origStdout = sys.stdout
        sys.stdout = StringIO()
        argv = [
            "DBUtil.py", "-c",
            "select TestTypes_id,MyText,MyInteger,MyReal,MyDateTime,MyYesNo from TestTypes where MyInteger > 200 and MyYesNo = True",
            "-"
        ]
        DBUtil.main(argv)
        sampleLines = sys.stdout.getvalue().split("\n")
        expected = [
            "TestTypes_id", "MyText", "MyInteger", "MyReal", "MyDateTime",
            "MyYesNo"
        ]
        sampleColumns = sampleLines[0].split()
        for iCol in range(len(expected)):  # Case-insensitive comparison
            expected[iCol] = expected[iCol].lower()
            sampleColumns[iCol] = sampleColumns[iCol].lower()
        for iCol, col in enumerate(sampleColumns):
            self.assertEqual(expected[iCol], col)
        self.assertEqual(
            2 + 1 + 1, len(sampleLines)
        )  # 2 data lines + 1 column name line + 1 newline at end of output
        sys.stdout = origStdout

    def test_insertFile(self):
        # Create a test data file to insert, and verify no errors
        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        tableName = "TestTypes"

        idFile = StringIO()

        DBUtil.insertFile(self.MULTI_LINE_DATA_FILE, tableName, None, "\t",
                          idFile)
        # Assume column names extracted from first row of data file

        # Verify number rows inserted
        self.assertEqual(len(self.MULTI_LINE_DATA_ROWS),
                         idFile.getvalue().count("\n"))
        results = DBUtil.execute(self.DATA_QUERY)
        self.assertEqual(self.MULTI_LINE_DATA_ROWS, results)

    def test_insertFile_commandline(self):
        # Similar to test_insertFile, but from higher-level command-line interface
        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        tableName = "TestTypes"
        columnNames = self.DATA_COLS.split()

        idFile = StringIO()

        # Slightly different test, specify tab as delimiter, not just any whitespace
        origStdin = sys.stdin
        origStdout = sys.stdout
        sys.stdin = self.MULTI_LINE_DATA_FILE
        sys.stdout = idFile
        argv = ["DBUtil.py", "-i-", "-d\\t", "-t" + tableName, "-o-"]
        DBUtil.main(argv)
        sys.stdout = origStdout
        sys.stdin = origStdin

        self.assertEqual(3, idFile.getvalue().count("\n"))
        results = DBUtil.execute(self.DATA_QUERY)
        self.assertEqual(self.MULTI_LINE_DATA_ROWS, results)

    def test_insertFile_skipErrors(self):
        # Similar to test_insertFile_commandline, but just test to see if skipErrors option works
        # Test run will show errror / warning messages from the app, but these are expected
        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        tableName = "TestTypes"
        columnNames = ["MyReal", "MyYesNo", "MyText", "MyInteger"]

        idFile = StringIO()

        # Try with bogus data that should generate errors

        dataFile = StringIO()
        dataFile.write("ABCD\tPositive\tBadTest\t100.123\n")
        dataFile.write("700.7\t" + FALSE_STR + "\tXTest\t777\n")
        dataFile.write("1,099\tNegative\tMoBadTest\tfoo\n")
        dataFile = StringIO(dataFile.getvalue())

        idFile = StringIO()

        origStdin = sys.stdin
        origStdout = sys.stdout
        sys.stdin = dataFile
        sys.stdout = idFile
        argv = ["DBUtil.py", "-i-", "-t" + tableName, "-o-"]
        argv.extend(columnNames)
        expectErr = True
        actualErr = False
        try:
            DBUtil.main(argv)
        except Exception as err:
            actualErr = True
        self.assertEqual(expectErr, actualErr)
        sys.stdout = origStdout
        sys.stdin = origStdin

        # Expect no rows succesffuly inserted since errors in input
        self.assertEqual(0, idFile.getvalue().count("\n"))
        results = DBUtil.execute(
            "select count(*) from TestTypes where MyText like %s", ("%Test", ))
        self.assertEqual(0, results[0][0])

        # Try again, with bogus data that should generate errors
        dataFile = StringIO()
        dataFile.write("ABCD\tPositive\tBadTest\t100.123\n")
        dataFile.write("700.7\t" + FALSE_STR + "\tXTest\t777\n")
        dataFile.write("1,099\tNegative\tMoBadTest\tfoo\n")
        dataFile = StringIO(dataFile.getvalue())

        idFile = StringIO()

        origStdin = sys.stdin
        origStdout = sys.stdout
        sys.stdin = dataFile
        sys.stdout = idFile
        argv = ["DBUtil.py", "-i-", "-t" + tableName, "-o-",
                "-e"]  # -e option skipsErrors
        argv.extend(columnNames)
        DBUtil.main(argv)
        sys.stdout = origStdout
        sys.stdin = origStdin

        # Still expect 1 row to get through successfuly, despite other invalid input
        self.assertEqual(1, idFile.getvalue().count("\n"))
        results = DBUtil.execute(
            "select count(*) from TestTypes where MyText like %s", ("%Test", ))
        self.assertEqual(1, results[0][0])

    def test_insertFile_dateParsing(self):
        # Create a test data file to insert, and verify no errors
        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        tableName = "TestTypes"
        columnNames = ["MyInteger", "MyText", "MyDateTime"]

        dataFile = StringIO()
        dataFile.write('''-1\t"12/11/2010"\t"12/11/2010"\n''')
        dataFile.write(
            '''-2\t"2013-04-15 13:45:21"\t"2013-04-15 13:45:21"\n''')
        dataFile.write(
            '''-3\t"2003-04-15 10:45:21"\t"2003-04-15 10:45:21"\n''')
        dataFile.write('''-4\t"4/11/12 6:20"\t"4/11/12 6:20"\n''')
        dataFile = StringIO(dataFile.getvalue())

        dateColFormats = {
            "myDateTime": None
        }  # Deliberately change capitalization to ensure robustness
        DBUtil.insertFile(dataFile,
                          tableName,
                          columnNames,
                          dateColFormats=dateColFormats)

        verifyQuery = \
            """select MyInteger, MyText, MyDateTime
            from TestTypes
            where MyInteger < 0
            order by MyInteger desc
            """

        expectedData = \
            [   [   -1, "12/11/2010", datetime(2010,12,11)  ],
                [   -2, "2013-04-15 13:45:21", datetime(2013,4,15,13,45,21) ],
                [   -3, "2003-04-15 10:45:21", datetime(2003,4,15,10,45,21) ],
                [   -4, "4/11/12 6:20", datetime(2012,4,11,6,20) ],
            ]

        # Verify rows inserted with properly parsed dates
        results = DBUtil.execute(verifyQuery)
        self.assertEqual(expectedData, results)

    def test_insertFile_escapeStrings(self):
        # Create a test data file to insert, and verify no errors
        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        tableName = "TestTypes"
        columnNames = ["MyInteger", "MyText"]

        dataFile = StringIO()
        dataFile.write('''-1\t"A"\n''')
        dataFile.write('''-2\t"B\xaeb"\n''')
        dataFile.write('''-3\t"C"\n''')
        dataFile.write('''-4\tD\n''')
        dataFile = StringIO(dataFile.getvalue())

        DBUtil.insertFile(dataFile, tableName, columnNames, escapeStrings=True)

        verifyQuery = \
            """select MyInteger, MyText
            from TestTypes
            where MyInteger < 0
            order by MyInteger desc
            """

        expectedData = \
            [   [   -1, "A"],
                [   -2, "B\\xaeb"],
                [   -3, "C"],
                [   -4, "D"],
            ]

        # Verify rows inserted with properly parsed dates
        results = DBUtil.execute(verifyQuery)
        self.assertEqual(expectedData, results)

    def test_identityQuery(self):
        DBUtil.runDBScript(self.SCRIPT_FILE, False)

        # Run some other commands to see if scripts produced expected results
        results = DBUtil.execute("select max(TestTypes_id) from TestTypes")
        lastSeq = results[0][0]

        conn = DBUtil.connection()
        try:
            cur = conn.cursor()
            cur.execute(
                "insert into TestTypes (MyText,MyInteger,MyYesNo) values ('Another',255,True)"
            )
            cur.execute(DBUtil.identityQuery("TestTypes"))
            self.assertEqual(lastSeq + 1, cur.fetchone()[0])

            cur.execute(
                "select TestTypes_id from TestTypes where MyText = 'Another' and MyInteger = 255"
            )
            self.assertEqual(lastSeq + 1, cur.fetchone()[0])
        finally:
            conn.close()

    def test_nullCheck(self):
        DBUtil.runDBScript(self.SCRIPT_FILE, False)

        conn = DBUtil.connection()
        try:
            DBUtil.execute(
                "insert into TestTypes (MyText,MyInteger) values ('Test With Null', 255)",
                conn=conn)
            DBUtil.execute(
                "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime) values ('Test With Not Null', 255, 1.23, '2005-03-06')",
                conn=conn)

            result = DBUtil.execute(
                "select MyText from TestTypes where MyInteger = 255 and MyReal is null",
                conn=conn)
            self.assertEqual('Test With Null', result[0][0])
            result = DBUtil.execute(
                "select MyText from TestTypes where MyInteger = 255 and MyReal is not null",
                conn=conn)
            self.assertEqual('Test With Not Null', result[0][0])

            # Would not work with MySQL if used TIMESTAMP data type.  Should be DATETIME.  (TIMESTAMP tries to auto-fill values, so no nulls allowed?)
            result = DBUtil.execute(
                "select MyText from TestTypes where MyInteger = 255 and MyDateTime is null",
                conn=conn)
            self.assertEqual('Test With Null', result[0][0])
            result = DBUtil.execute(
                "select MyText from TestTypes where MyInteger = 255 and MyDateTime is not null",
                conn=conn)
            self.assertEqual('Test With Not Null', result[0][0])

        finally:
            conn.close()

    def test_findOrInsertItem(self):
        DBUtil.runDBScript(self.SCRIPT_FILE, False)

        searchDict = {}
        insertDict = {}

        searchDict["TestTypes_id"] = +123
        log.debug("Insert a new item using default params")
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict)
        self.assertEqual(+123, data)
        self.assertEqual(True, isNew)

        log.debug("Find the existing item")
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict)
        self.assertEqual(+123, data)
        self.assertEqual(False, isNew)

        insertDict["TestTypes_id"] = +456
        log.debug("Find existing item, with optional insert data")
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict,
                                                insertDict)
        self.assertEqual(+123, data)
        self.assertEqual(False, isNew)

        searchDict["TestTypes_id"] = +789
        insertDict["TestTypes_id"] = +789
        insertDict["MyInteger"] = 123
        log.debug("Insert a new item with actual data")
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict,
                                                insertDict)
        self.assertEqual(+789, data)
        self.assertEqual(True, isNew)

        searchDict["TestTypes_id"] = +234
        insertDict["TestTypes_id"] = +234
        log.debug("Retrieve a different column")
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes",
                                                searchDict,
                                                insertDict,
                                                retrieveCol="MyText")
        self.assertEqual(None, data)
        self.assertEqual(True, isNew)

        searchDict["TestTypes_id"] = +345
        insertDict["TestTypes_id"] = +345
        insertDict["MyText"] = "testText"
        log.debug("Insert and retrieve a different column")
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes",
                                                searchDict,
                                                insertDict,
                                                retrieveCol="MyText")
        self.assertEqual("testText", data)
        self.assertEqual(True, isNew)

        insertDict["MyText"] = "newText"
        log.debug(
            "Try inserting a different value under an existing row.  Should NOT work"
        )
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes",
                                                searchDict,
                                                insertDict,
                                                retrieveCol="MyText")
        self.assertEqual("testText", data)
        self.assertEqual(False, isNew)

        log.debug(
            "Try inserting a different value under an existing row, but force the update"
        )
        insertDict["MyText"] = "newText"
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes",
                                                searchDict,
                                                insertDict,
                                                retrieveCol="MyText",
                                                forceUpdate=True)
        self.assertEqual("newText", data)
        self.assertEqual(False, isNew)

    def test_updateFromFile(self):
        # Create a test data file to insert, and verify no errors
        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        # Insert some blank data first to update
        for idValue in self.ID_DATA:
            DBUtil.execute(
                "insert into TestTypes (" + self.ID_COL + ") values (%s)",
                (idValue, ))

        # Negative test case
        results = DBUtil.execute(self.DATA_QUERY)
        self.assertNotEqual(self.DATA_ROWS, results)

        # Now do the actual update from the file
        DBUtil.updateFromFile(self.DATA_FILE,
                              self.DATA_TABLE,
                              self.COL_NAMES,
                              delim="\t")

        results = DBUtil.execute(self.DATA_QUERY)
        self.assertEqual(self.DATA_ROWS, results)

    def test_updateFromFile_commandline(self):
        # Similar to test_updateFromFile, but from higher-level command-line interface
        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        # Insert some blank data first to update
        for idValue in self.ID_DATA:
            DBUtil.execute(
                "insert into TestTypes (" + self.ID_COL + ") values (%s)",
                (idValue, ))

        # Negative test case
        results = DBUtil.execute(self.DATA_QUERY)
        self.assertNotEqual(self.DATA_ROWS, results)

        # Now do the actual update from the file, but build in column names to data file
        dataFileWithCols = StringIO()
        dataFileWithCols.write(self.DATA_COLS)
        dataFileWithCols.write(self.DATA_FILE.getvalue())
        dataFileWithCols = StringIO(dataFileWithCols.getvalue())

        sys.stdin = dataFileWithCols
        argv = ["DBUtil.py", "-u-", "-t" + self.DATA_TABLE, "-d\\t"]
        DBUtil.main(argv)

        # Verify positive results
        results = DBUtil.execute(self.DATA_QUERY)
        self.assertEqual(self.DATA_ROWS, results)

        ########################################################
        # Repeat test but data file will use more than one key column (adding MyText)
        # Further note that MyText is used as both a key column to look up the row to update
        #   and as a value column to modify
        dataFileWithCols = StringIO()
        dataFileWithCols.write("MyInteger\tMyText\tMyText\tMyReal\tMyYesNo\n")
        dataFileWithCols.write("100\tATest\tAAA\tNone\t" + TRUE_STR + "\t\n")
        dataFileWithCols.write("200\tNone\tBBB\t222.2\tNone\t\n")
        dataFileWithCols.write("300\tCTest\tNone\t333.3\t" + TRUE_STR + "\t\n")
        dataFileWithCols = StringIO(dataFileWithCols.getvalue())

        # Expected results after this update
        self.DATA_ROWS = []
        self.DATA_ROWS.append([
            100,
            None,
            True,
            "AAA",
        ])
        self.DATA_ROWS.append([
            200,
            200.2,
            False,
            None,
        ])
        # This row is unchanged, because one of the key values cannot be found as null
        self.DATA_ROWS.append([
            300,
            333.3,
            True,
            None,
        ])

        # Negative test case
        results = DBUtil.execute(self.DATA_QUERY)
        self.assertNotEqual(self.DATA_ROWS, results)

        # Now do the actual update from the file, but with an extra parameter specifying 2 key columns
        sys.stdin = dataFileWithCols
        argv = ["DBUtil.py", "-u-", "-t" + self.DATA_TABLE, "-n2"]
        DBUtil.main(argv)

        # Verify positive results
        results = DBUtil.execute(self.DATA_QUERY)
        self.assertEqual(self.DATA_ROWS, results)

    def test_deleteRows(self):
        DBUtil.runDBScript(self.SCRIPT_FILE, False)

        query = "select count(*) from TestTypes;"

        # Insert some test data to delete
        tableName = "TestTypes"
        columnNames = self.DATA_COLS.split()

        idFile = StringIO()
        DBUtil.insertFile(self.DATA_FILE, tableName, columnNames, None, idFile)
        idValues = idFile.getvalue().split()

        # Count up rows before and after delete
        initialCount = DBUtil.execute(query)[0][0]
        DBUtil.deleteRows("TestTypes", idValues)
        afterCount = DBUtil.execute(query)[0][0]

        self.assertEqual(initialCount - len(idValues), afterCount)

        # Reinsert the test data to try deleting them by a non-default Id column
        idFile = StringIO()
        DBUtil.insertFile(self.DATA_FILE, tableName, columnNames, None, idFile)

        nonDefaultIds = [100, 200]
        initialCount = DBUtil.execute(query)[0][0]
        DBUtil.deleteRows("TestTypes", nonDefaultIds, "MyInteger")
        afterCount = DBUtil.execute(query)[0][0]