예제 #1
0
    def queryItems(self, options, outputFile):
        """Query for all clinical item records that fulfill the options criteria
        and then send the results as tab-delimited output to the outputFile.
        """
        pauseSeconds = float(options.pauseSeconds)

        query = SQLQuery()
        query.addSelect("count(order_med_id_coded) as nOrders")
        query.addSelect("om.med_route, om.medication_id, om.med_description")
        query.addFrom("starr_datalake2018.order_med as om")
        if options.descriptionPrefix:
            query.addWhereOp("om.med_description", "like",
                             options.descriptionPrefix + "%%")
            # Add wildcard to enabe prefix search
        if options.medRoutes:
            query.addWhereIn("om.med_route", options.medRoutes.split(","))
        query.addGroupBy("om.medication_id, om.med_description, om.med_route")
        query.addOrderBy("nOrders desc, om.med_description")

        formatter = TextResultsFormatter(outputFile)

        prog = ProgressDots()
        for row in DBUtil.execute(query,
                                  includeColumnNames=True,
                                  connFactory=self.connFactory):
            formatter.formatTuple(row)
            time.sleep(pauseSeconds)
            prog.update()
        prog.printStatus()
예제 #2
0
    def queryItems(self, options, outputFile):
        """Query for all clinical item records that fulfill the options criteria
        and then send the results as tab-delimited output to the outputFile.
        """
        pauseSeconds = float(options.pauseSeconds)

        query = SQLQuery()
        query.addSelect(
            "cic.description, ci.clinical_item_id, ci.name, ci.description")
        query.addFrom("clinical_item_category as cic")
        query.addFrom("clinical_item as ci")
        query.addWhere(
            "cic.clinical_item_category_id = ci.clinical_item_category_id")
        if options.itemPrefix:
            query.addWhereOp("ci.description", "like",
                             options.itemPrefix + "%%")
            # Add wildcard to enabe prefix search
        if options.categoryNames:
            query.addWhereIn("cic.description",
                             options.categoryNames.split(","))
        query.addOrderBy(
            "cic.description, ci.name, ci.description, ci.clinical_item_id")

        formatter = TextResultsFormatter(outputFile)

        prog = ProgressDots()
        for row in DBUtil.execute(query,
                                  includeColumnNames=True,
                                  connFactory=self.connFactory):
            formatter.formatTuple(row)
            time.sleep(pauseSeconds)
            prog.update()
        prog.printStatus()
예제 #3
0
def parseLabResultsFile(labFile):
    log.info("Parse lab results file");
    prog = ProgressDots();
    labsByBaseNameByPatientId = dict(); # Dictionary of dictionaries of lists of result items
    for labResult in TabDictReader(labFile):
        if labResult["ord_num_value"] is not None and labResult["ord_num_value"] != NULL_STRING:
            patientId = int(labResult["pat_id"]);
            labBaseName = labResult["base_name"];
            resultValue = float(labResult["ord_num_value"]);
            resultTime = DBUtil.parseDateValue(labResult["result_time"]);

            if resultValue < LAB_SENTINEL_VALUE:    # Skip apparent placeholder values
                labResult["pat_id"] = labResult["patient_id"] = patientId;
                labResult["ord_num_value"] = resultValue;
                labResult["result_time"] = resultTime;

                if patientId not in labsByBaseNameByPatientId:
                    labsByBaseNameByPatientId[patientId] = dict();
                if labBaseName not in labsByBaseNameByPatientId[patientId]:
                    labsByBaseNameByPatientId[patientId][labBaseName] = list();
                labsByBaseNameByPatientId[patientId][labBaseName].append( labResult );

        prog.update();
    prog.printStatus();
    return labsByBaseNameByPatientId;
예제 #4
0
def addLabFeatures(labsByBaseNameByPatientId, patientById, colNames, indexItemBaseName, labBaseNames, labPreTime, labPostTime):
    log.info("Sort lab results by result time for each patient and find items within specified time period to aggregate");
    prog = ProgressDots();
    for iPatient, (patientId, labsByBaseName) in enumerate(labsByBaseNameByPatientId.iteritems()):
        # Look for the first result of the index item (ferritin)
        indexItem = None;
        if indexItemBaseName in labsByBaseName:
            for labResult in labsByBaseName[indexItemBaseName]:
                if indexItem is None or labResult["result_time"] < indexItem["result_time"]:
                    indexItem = labResult;

        if indexItem is not None:   # Skip this patient if no index item found, should not be possible since pre-screened for relevant patients
            indexTime = indexItem["result_time"];

            patient = patientById[patientId];
            patient["index_time"] = indexTime;

            preTimeLimit = indexTime+labPreTime;
            postTimeLimit = indexTime+labPostTime;

            # Init values for each lab of interest to an empty list
            for labBaseName in labBaseNames:
                # Default to null for all values
                patient["%s.min" % labBaseName] = None;
                patient["%s.max" % labBaseName] = None;
                patient["%s.median" % labBaseName] = None;
                patient["%s.mean" % labBaseName] = None;
                patient["%s.std" % labBaseName] = None;
                patient["%s.first" % labBaseName] = None;
                patient["%s.last" % labBaseName] = None;
                patient["%s.proximate" % labBaseName] = None;
                
                proximateValue = None;
                if labBaseName in labsByBaseName:   # Not all patients will have all labs checked
                    proximateItem = None;   # Item closest to the index item in time
                    valueList = list();
                    for labResult in labsByBaseName[labBaseName]:
                        resultTime = labResult["result_time"];
                        if preTimeLimit <= resultTime and resultTime < postTimeLimit:
                            # Occurs within time frame of interest, so record this value
                            valueList.append(labResult["ord_num_value"]);
                            
                        if proximateItem is None or (abs(resultTime-indexTime) < abs(proximateItem["result_time"]-indexTime)):
                            proximateItem = labResult;
                    proximateValue = proximateItem["ord_num_value"];
                
                    if len(valueList) > 0:
                        patient["%s.min" % labBaseName] = np.min(valueList);
                        patient["%s.max" % labBaseName] = np.max(valueList);
                        patient["%s.median" % labBaseName] = np.median(valueList);
                        patient["%s.mean" % labBaseName] = np.mean(valueList);
                        patient["%s.std" % labBaseName] = np.std(valueList);
                        patient["%s.first" % labBaseName] = valueList[0];   # Assumes previously sorted 
                        patient["%s.last" % labBaseName] = valueList[-1];   #   by result_time
                    patient["%s.proximate" % labBaseName] = proximateValue;

                prog.update();
    colNames.extend(colsFromLabBaseNames(labBaseNames));
    prog.printStatus();
예제 #5
0
def main(argv):
    """Main method, callable from command line"""
    usageStr = "usage: %prog -p <pid> -c <commandStr>\n"
    parser = OptionParser(usage=usageStr)
    parser.add_option(
        "-p",
        "--pid",
        dest="pid",
        help=
        "Process ID to monitor.  As soon as it is no longer found by Unix signal (os.kill(pid,0)), will proceed to execute the given command"
    )
    parser.add_option(
        "-i",
        "--interval",
        dest="interval",
        default="1",
        help=
        "How many seconds to wait before checking if the PID is still active.  Default to 1 second."
    )
    parser.add_option(
        "-c",
        "--commandStr",
        dest="commandStr",
        help=
        "Command string to execute after the specified PID is longer found.  Will just pass whole string to subprocess"
    )
    (options, args) = parser.parse_args(argv[1:])

    print >> sys.stderr, "Starting: " + str.join(" ", argv)
    timer = time.time()
    if options.pid and options.commandStr:
        pid = int(options.pid)
        interval = float(options.interval)

        prog = ProgressDots(60, 1, "intervals")
        while pidExists(pid):
            time.sleep(interval)
            prog.update()
        prog.printStatus()

        print >> sys.stderr, "Executing: ", options.commandStr
        process = subprocess.Popen(options.commandStr)
        print >> sys.stderr, "Started process: ", process.pid
    else:
        parser.print_help()
        sys.exit(-1)

    timer = time.time() - timer
    print >> sys.stderr, ("%.3f seconds to complete" % timer)
예제 #6
0
def main(argv):
    conversionProcessor = STRIDEOrderResultsConversion();
    
    conn = DBUtil.connection();
    try:
        # Pull out list of result names to look for that are not already in the calculated
        nameTable = DBUtil.execute("select name from sim_result    except    select base_name from order_result_stat", conn=conn);
        prog = ProgressDots(big=1,small=1,total=len(nameTable));
        for row in nameTable:
            baseName = row[0];
            print("Calculating Stats for %s" % baseName, file=sys.stderr);
            statModel = conversionProcessor.calculateResultStats( baseName, conn=conn );
            DBUtil.insertRow("order_result_stat", statModel, conn=conn );
            prog.update();
        prog.printStatus();
        conn.commit();
    finally:
        conn.close();
예제 #7
0
def parseClinicalItemFile(itemFile, patientIdCol="patient_id", timeCol="item_date"):
    prog = ProgressDots();
    itemTimesByPatientId = dict();
    for itemData in TabDictReader(itemFile):
        patientId = int(itemData[patientIdCol]);
        itemTime = DBUtil.parseDateValue(itemData[timeCol]);

        itemData[patientIdCol] = patientId;
        itemData[timeCol] = itemTime;

        if patientId not in itemTimesByPatientId:
            itemTimesByPatientId[patientId] = list();
        itemTimesByPatientId[patientId].append( itemTime );

        prog.update();
    prog.printStatus();

    return itemTimesByPatientId;
예제 #8
0
def main_quickTest(argv):
    modelFilename = argv[1]
    modeler = TopicModel()

    timer = time.time()
    (model, docCountByWordId) = modeler.loadModelAndDocCounts(modelFilename)
    timer = time.time() - timer
    log.info("%.2f seconds to load", timer)

    timer = time.time()
    weightByItemIdByTopicId = modeler.generateWeightByItemIdByTopicId(
        model, 100)
    timer = time.time() - timer
    log.info("%.2f seconds to generate weights", timer)

    for i in xrange(3):
        prog = ProgressDots()
        for (topicId, weightByItemId) in weightByItemIdByTopicId.iteritems():
            for (itemId, itemWeight) in weightByItemId.iteritems():
                prog.update()
        prog.printStatus()
    """
예제 #9
0
def queryPatientEpisodes(outputFile, extractor):
    log.info(
        "Select patient admissions with provider category of Tt Pamf Med (Primary) or Tt Med Univ (Primary)"
    )

    conn = DBUtil.connection()
    cursor = conn.cursor()
    try:
        # # Clinical item category for admission diagnoses
        # # ADMIT_DX_CATEGORY_ID = 2;
        # admitDxCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like '%%ADMIT_DX%%'", conn=conn)[0][0];

        # # Look for items indicating suspected infection / sepsis
        # ivAntibioticItemIds = loadIVAntibioticItemIds(extractor);
        # bloodCultureItemIds = loadBloodCultureItemIds(extractor);
        # respiratoryViralPanelItemIds = loadRespiratoryViralPanelItemIds(extractor);

        # # Merge IV antibiotics and blood cultures, respiratory panels as items that suggest sepsis is suspected
        # suspectSepsisItemIds = ivAntibioticItemIds.union(bloodCultureItemIds.union(respiratoryViralPanelItemIds));
        # suspectSepsisItemIdsStr = str.join(',', [str(itemId) for itemId in suspectSepsisItemIds]);   # Convert to comma-separated string via a str.join function on list contracture

        # # Look for primary surgery teams to exclude
        # excludeTeamCategory = "SurgerySpecialty";
        # excludeTreatmentTeams = list();
        # for row in extractor.loadMapData("TreatmentTeamGroups"):
        #     if row["team_category"] == excludeTeamCategory:
        #         excludeTreatmentTeams.append(row["treatment_team"]);
        # query = SQLQuery();
        # query.addSelect("clinical_item_id");
        # query.addFrom("clinical_item");
        # query.addWhereIn("description", excludeTreatmentTeams );
        # excludeTeamItemIds = set();
        # for row in DBUtil.execute(query, conn=conn):
        #     excludeTeamItemIds.add(row[0]);
        # excludeTeamItemIdsStr = str.join(',', [str(itemId) for itemId in excludeTeamItemIds]);   # Convert to comma-separated string via a str.join function on list contracture

        # First pass query to get the list of patients and emergency department presentation times
        cohortQuery = \
        """
        select adt1.pat_anon_id, adt1.pat_enc_csn_anon_id, adt1.shifted_transf_in_dt_tm as edAdmitTime, adt2.shifted_transf_out_dt_tm as dischargeTime
        from stride_adt as adt1, stride_adt as adt2
        where 
            adt1.pat_anon_id in
            (select patient_id from patient_item inner join clinical_item on patient_item.clinical_item_id = clinical_item.clinical_item_id where clinical_item.clinical_item_category_id = 161 AND clinical_item.description = '%s') 
        and adt1.pat_enc_csn_anon_id = adt2.pat_enc_csn_anon_id
        """ % ("Tt Pamf Med (Primary)")

        print(cohortQuery, file=sys.stderr)
        cursor.execute(cohortQuery)

        patientEpisodes = list()
        patientEpisodeById = dict()

        # Collect Build basic patient ID and
        #   ED presentation dates and Discharge date/time
        prog = ProgressDots()
        row = cursor.fetchone()
        while row is not None:
            (patientId, encounterId, edAdmitTime, dischargeTime) = row
            #patientId = int(patientId);
            patientEpisode = \
                RowItemModel \
                (   {   "patient_id":patientId,
                        "edAdmitTime":edAdmitTime,
                        "dischargeTime":dischargeTime,
                        "encounter_id":encounterId,
                        "payorTitle": None, # Default encounter data to null in case can't find it later
                        "bpSystolic": None,
                        "bpDiastolic": None,
                        "temperature": None,
                        "pulse": None,
                        "respirations": None,
                    }
                )
            patientEpisodes.append(patientEpisode)
            if patientEpisode["encounter_id"] not in patientEpisodeById:
                patientEpisodeById[
                    patientEpisode["encounter_id"]] = patientEpisode

            prog.update()
            row = cursor.fetchone()
        prog.printStatus()

        # Second query phase to link to encounter information (e.g., insurance, admitting vital signs)
        encounterIds = columnFromModelList(patientEpisodes, "encounter_id")
        query = SQLQuery()
        query.addSelect("pat_id")
        query.addSelect("pat_enc_csn_id")
        query.addSelect("title")
        query.addSelect("bp_systolic")
        query.addSelect("bp_diastolic")
        query.addSelect("temperature")
        query.addSelect("pulse")
        query.addSelect("respirations")
        query.addFrom("stride_patient_encounter")
        query.addWhereIn("pat_enc_csn_id", encounterIds)
        cursor.execute(str(query), query.params)
        row = cursor.fetchone()
        while row is not None:
            (patientId, encounterId, payorTitle, bpSystolic, bpDiastolic,
             temperature, pulse, respirations) = row
            if encounterId in patientEpisodeById:
                patientEpisode = patientEpisodeById[encounterId]
                if patientEpisode["payorTitle"] is None:
                    patientEpisode["payorTitle"] = set()
                    # Single encounters may have multiple payors to track
                patientEpisode["payorTitle"].add(payorTitle)
                patientEpisode["bpSystolic"] = bpSystolic
                patientEpisode["bpDiastolic"] = bpDiastolic
                patientEpisode["temperature"] = temperature
                patientEpisode["pulse"] = pulse
                patientEpisode["respirations"] = respirations
            row = cursor.fetchone()

        # Drop results as tab-delimited text output
        formatter = TextResultsFormatter(outputFile)
        formatter.formatResultDicts(patientEpisodes, addHeaderRow=True)

        return patientEpisodes
    finally:
        cursor.close()
        conn.close()
예제 #10
0
# Select the second (index one) form (the first form is a search query box)
#br.select_form(nr=1)

# User credentials, single login and then store a cookie
#br.form['username'] = '******'
#br.form['password'] = '******'

# Login
#br.submit()

# Base URL to query for, with parameters for subsets
BASE_URL = 'http://www.samhsa.gov/medication-assisted-treatment/physician-program-data/treatment-physician-locator?field_bup_physician_us_state_value=All&page=%s'
BASE_OUTPUT_FILENAME = 'buprenorphinePhysicians.%s.htm';
N_PAGES = 641
#N_PAGES = 10

progress = ProgressDots(big=100,small=2);
for iPage in xrange(N_PAGES):
    sourceURL = BASE_URL % (iPage);
    sourceFile = br.open(sourceURL);
    sourceContent = sourceFile.read();  # Just store whole file in memory for simplicity
    sourceFile.close();
    
    localFilename = BASE_OUTPUT_FILENAME % (iPage);
    localFile = open(localFilename,"w");
    localFile.write(sourceContent);
    localFile.close();
    
    progress.update();
progress.printStatus();
예제 #11
0
        for procCode, description, count in subResults:
            print str.join(
                "\t", [procCode, description, lastBaseName, lastComponentName])

        orderProcIds.clear()
        # Reset once done
        nIds = 0

    lastBaseName = baseName
    lastComponentName = componentName
    if nIds < SAMPLE_SIZE:  # Don't track all, just the first sample
        orderProcIds.add(orderProcId)
        nIds += 1

    row = cursor.fetchone()
    prog.update()

# Final base name to query off of
subQuery = """
	select proc_code, description, count(order_proc_id)
	from stride_order_proc
	where order_proc_id in (%s)
	group by proc_code, description
	""" % generatePlaceholders(nIds)
subResults = DBUtil.execute(subQuery, tuple(orderProcIds), conn=conn)
for procCode, description, count in subResults:
    print str.join("\t",
                   [procCode, description, lastBaseName, lastComponentName])

prog.printStatus()
예제 #12
0
def main(argv):

    # Initial pass to get single diagnosis baselines by looking for 2x Dx combos where Dx1 = Dx2
    countByDx = dict()
    countByRxDx = dict()
    associationFile = open(argv[1])
    associationFile.readline()
    # Dump header row
    for line in associationFile:
        line.strip()
        chunks = line.split("\t")
        rx = chunks[0]
        dx1 = chunks[1]
        dx2 = chunks[2]
        if dx1 == dx2:
            rxDxCount = int(chunks[3])
            rxCount = int(chunks[4])
            dxCount = int(chunks[5])
            countByRxDx[(rx, dx1)] = rxDxCount
            countByDx[dx1] = dxCount

    # Second pass to now do stats for combo diagnoses, to see if prescription shows difference between 1 or both diagnoses
    statIds = (
        "P-Fisher",
        "P-YatesChi2",
        "oddsRatio",
        "relativeRisk",
        "interest",
        "LR+",
        "LR-",
        "sensitivity",
        "specificity",
        "PPV",
        "NPV",
    )
    headerCols = [
        "Rx",
        "Dx1",
        "Dx2",
        "RxDx1Dx2Count",
        "RxDx1Count",
        "RxDx2Count",
        "RxCount",
        "Dx1Dx2Count",
        "Total",
        "E(RxDx1Dx2Count)",
        "E(RxDx2Dx1Count)",
        "E(Dx1Dx2Count)",
        "P-Chi2-Obs:Exp",
    ]
    headerCols.extend(statIds)
    headerStr = str.join("\t", headerCols)
    print(headerStr)

    associationFile = open(argv[1])
    associationFile.readline()
    # Dump header row
    progress = ProgressDots()
    for line in associationFile:
        line.strip()
        chunks = line.split("\t")
        rx = chunks[0]
        dx1 = chunks[1]
        dx2 = chunks[2]
        rxDx1Dx2Count = int(chunks[3])
        rxDx1Count = countByRxDx[(rx, dx1)]
        rxDx2Count = countByRxDx[(rx, dx2)]
        rxCount = int(chunks[4])
        dx1Count = countByDx[dx1]
        dx2Count = countByDx[dx2]
        dx1dx2Count = int(chunks[5])
        totalCount = float(chunks[6])
        # Floating point to auto-convert float divisions later

        conStats = ContingencyStats(rxDx1Dx2Count, rxCount, dx1dx2Count,
                                    totalCount)

        # Expected vs. observed Rx rates dependent on presence of Dx1Dx2 combination based one whether Rx rates per diagnosis are independent of combination
        observed = array \
            ([  (rxDx1Dx2Count),           (dx1dx2Count-rxDx1Dx2Count),
                (rxCount-rxDx1Dx2Count),   (totalCount-dx1dx2Count-rxCount+rxDx1Dx2Count),
            ])

        # Expected rates based on assumption that diagnoses occur independently of one another
        expectedRxDx1Dx2 = (rxDx1Count * dx2Count / totalCount)
        expectedRxDx2Dx1 = (rxDx2Count * dx1Count / totalCount)
        expectedDx1Dx2 = (dx1Count * dx2Count / totalCount)
        expected = array \
            ([   (expectedRxDx1Dx2),            (expectedDx1Dx2-expectedRxDx1Dx2),
                (rxCount-expectedRxDx1Dx2),    (totalCount-expectedDx1Dx2-rxCount+expectedRxDx1Dx2),
            ])

        (chi2ObsExp, pChi2ObsExp) = chisquare(observed, expected)

        dataCells = [
            rx, dx1, dx2, rxDx1Dx2Count, rxDx1Count, rxDx2Count, rxCount,
            dx1dx2Count, totalCount, expectedRxDx1Dx2, expectedRxDx2Dx1,
            expectedDx1Dx2, pChi2ObsExp
        ]
        for statId in statIds:
            try:
                dataCells.append(conStats[statId])
            except ZeroDivisionError:
                dataCells.append(None)
        for i, value in enumerate(dataCells):
            dataCells[i] = str(value)
            # String conversion to allow for concatenation below
        dataStr = str.join("\t", dataCells)
        print(dataStr)
        progress.update()
    progress.printStatus()
예제 #13
0
    ];

for categoryName in CATEGORY_LIST:
    for itemPrefix in ITEM_PREFIXES:
        specificArgv = ["-c", categoryName, "-i", itemPrefix, "results/queryResults.%s.%s.tab.gz" % (itemPrefix, categoryName) ];
        specificArgvList.append(specificArgv);

prog = ProgressDots(1,1,"Processes",total=len(specificArgvList));
for specificArgv in specificArgvList:
    key = "%.3d.%s" % (prog.getCounts(), str.join("_",specificArgv) );
    key = key.replace("/","..");    # Don't want to use directory separator in temp log file name
    argv = list(baseArgv)
    argv.extend(specificArgv);
    log.info( "Starting: "+str.join(" ", argv) );
    logFile = stdOpen(LOG_FILE_TEMPLATE % key,"w")
    
    # Blocking sub-process call if want serial processes.
    #subprocess.call(argv, stderr=logFile);

    # Non-blocking subprocess.Popen to spawn parallel processes
    process = subprocess.Popen(argv, stderr=logFile);
    log.info("Process ID: %s" % process.pid);

    # Print command lines to effectively generate a .sh script
    #print "nohup",
    #print str.join(" ", argv),
    #print "&>", LOG_FILE_TEMPLATE % key,"&"
 
    prog.update();
prog.printStatus();
예제 #14
0
def queryPatientEpisodes(outputFile, extractor):
    log.info("Select patient admissions with possible/probable sepsis within 24 hours of admission (long query >60 min?)...");

    conn = DBUtil.connection();
    cursor = conn.cursor();
    try:
        # Clinical item category for admission diagnoses
        # ADMIT_DX_CATEGORY_ID = 2;
        admitDxCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like '%%ADMIT_DX%%'", conn=conn)[0][0];

        # Look for items indicating suspected infection / sepsis
        ivAntibioticItemIds = loadIVAntibioticItemIds(extractor);
        bloodCultureItemIds = loadBloodCultureItemIds(extractor);
        respiratoryViralPanelItemIds = loadRespiratoryViralPanelItemIds(extractor);

        # Merge IV antibiotics and blood cultures, respiratory panels as items that suggest sepsis is suspected
        suspectSepsisItemIds = ivAntibioticItemIds.union(bloodCultureItemIds.union(respiratoryViralPanelItemIds));
        suspectSepsisItemIdsStr = str.join(',', [str(itemId) for itemId in suspectSepsisItemIds]);   # Convert to comma-separated string via a str.join function on list contracture

        # Look for primary surgery teams to exclude
        excludeTeamCategory = "SurgerySpecialty";
        excludeTreatmentTeams = list();
        for row in extractor.loadMapData("TreatmentTeamGroups"):
            if row["team_category"] == excludeTeamCategory:
                excludeTreatmentTeams.append(row["treatment_team"]);
        query = SQLQuery();
        query.addSelect("clinical_item_id");
        query.addFrom("clinical_item");
        query.addWhereIn("description", excludeTreatmentTeams );
        excludeTeamItemIds = set();
        for row in DBUtil.execute(query, conn=conn):
            excludeTeamItemIds.add(row[0]);
        excludeTeamItemIdsStr = str.join(',', [str(itemId) for itemId in excludeTeamItemIds]);   # Convert to comma-separated string via a str.join function on list contracture

        # First pass query to get the list of patients and emergency department presentation times
        cohortQuery = \
        """
        --  Pick out date(s) when admitted through emergency department and matching discharge time
        select adt1.pat_anon_id, adt1.pat_enc_csn_anon_id, adt1.shifted_transf_in_dt_tm as edAdmitTime, adt2.shifted_transf_out_dt_tm as dischargeTime
        from stride_adt as adt1, stride_adt as adt2
        where 
            -- Admission event
            adt1.department_in = 'EMERGENCY DEPARTMENT' and
            adt1.event_in = 'Admission' and
            adt1.pat_anon_id in
            (    -- Select any patient with any suspected sepsis related order (i.e., IV antibiotics or blood cultures)
                select patient_id
                from patient_item as pi
                where pi.clinical_item_id in (%s)
                except
                -- Exclude any patient who has been on a primary surgery team
                select patient_id
                from patient_item
                where clinical_item_id in (%s)
                -- -12434586418575,-12432455207729,-12428492282572,-12428492282572,-12424048595257,-12414081679705
            ) and
            
            adt1.pat_enc_csn_anon_id = adt2.pat_enc_csn_anon_id and
            
            -- Discharge event
            adt2.event_out = 'Discharge'
            
        order by adt1.shifted_transf_in_dt_tm
        """ % (suspectSepsisItemIdsStr, excludeTeamItemIdsStr);
        print >> sys.stderr, cohortQuery;
        cursor.execute(cohortQuery);

        patientEpisodes = list();
        patientEpisodeById = dict();

        # Collect Build basic patient ID and 
        #   ED presentation dates and Discharge date/time
        prog = ProgressDots();
        row = cursor.fetchone();
        while row is not None:
            (patientId, encounterId, edAdmitTime, dischargeTime) = row;
            #patientId = int(patientId);
            patientEpisode = \
                RowItemModel \
                (   {   "patient_id":patientId, 
                        "edAdmitTime":edAdmitTime, 
                        "dischargeTime":dischargeTime, 
                        "encounter_id":encounterId,
                        "payorTitle": None, # Default encounter data to null in case can't find it later
                        "bpSystolic": None,
                        "bpDiastolic": None,
                        "temperature": None,
                        "pulse": None,
                        "respirations": None,
                    }
                );
            patientEpisodes.append(patientEpisode);
            if patientEpisode["encounter_id"] not in patientEpisodeById:
                patientEpisodeById[patientEpisode["encounter_id"]] = patientEpisode;

            prog.update();
            row = cursor.fetchone();
        prog.printStatus();

        # Second query phase to link to encounter information (e.g., insurance, admitting vital signs)
        encounterIds = columnFromModelList(patientEpisodes, "encounter_id");
        query = SQLQuery();
        query.addSelect("pat_id");
        query.addSelect("pat_enc_csn_id");
        query.addSelect("title");
        query.addSelect("bp_systolic");
        query.addSelect("bp_diastolic");
        query.addSelect("temperature");
        query.addSelect("pulse");
        query.addSelect("respirations");
        query.addFrom("stride_patient_encounter");
        query.addWhereIn("pat_enc_csn_id", encounterIds);
        cursor.execute(str(query), query.params);
        row = cursor.fetchone();
        while row is not None:
            (patientId, encounterId, payorTitle, bpSystolic, bpDiastolic, temperature, pulse, respirations) = row;
            if encounterId in patientEpisodeById:
                patientEpisode = patientEpisodeById[encounterId];
                if patientEpisode["payorTitle"] is None:
                    patientEpisode["payorTitle"] = set();   # Single encounters may have multiple payors to track
                patientEpisode["payorTitle"].add(payorTitle);
                patientEpisode["bpSystolic"] = bpSystolic;
                patientEpisode["bpDiastolic"] = bpDiastolic;
                patientEpisode["temperature"] = temperature;
                patientEpisode["pulse"] = pulse;
                patientEpisode["respirations"] = respirations;
            row = cursor.fetchone();
        
        # Drop results as tab-delimited text output
        formatter = TextResultsFormatter(outputFile);
        formatter.formatResultDicts(patientEpisodes, addHeaderRow=True);

        return patientEpisodes;
    finally:
        cursor.close();
        conn.close();
예제 #15
0
def main(argv):
    medIdsByActiveRx = dict()
    medIdsByActiveRx['Buprenorphine'] = ('125498', '114474', '212560',
                                         '114475', '114467', '114468')
    medIdsByActiveRx['Fentanyl Patch'] = ('2680', '27908', '125379', '27905',
                                          '27906', '540107', '540638',
                                          '540101', '27907')
    medIdsByActiveRx['Methadone'] = ('540483', '4953', '4951', '10546',
                                     '214468', '15996', '41938', '4954',
                                     '4952')
    medIdsByActiveRx['Hydrocodone'] = ('3724', '4579', '8576', '8577', '8951',
                                       '10204', '12543', '13040', '14963',
                                       '14965', '14966', '17061', '17927',
                                       '19895', '20031', '28384', '29486',
                                       '29487', '34505', '34544', '35613',
                                       '117862', '204249', '206739')
    medIdsByActiveRx['Hydromorphone'] = ('2458', '2459', '2464', '2465',
                                         '3757', '3758', '3759', '3760',
                                         '3761', '10224', '10225', '10226',
                                         '10227', '200439', '201094', '201096',
                                         '201098', '540125', '540179',
                                         '540666')
    medIdsByActiveRx['Morphine'] = (
        '5167', '5168', '5172', '5173', '5176', '5177', '5178', '5179', '5180',
        '5183', '6977', '10655', '15852', '20908', '20909', '20910', '20914',
        '20915', '20919', '20920', '20921', '20922', '29464', '30138', '31413',
        '36140', '36141', '79691', '87820', '89282', '91497', '95244', '96810',
        '112562', '112564', '115335', '115336', '126132', '198543', '198544',
        '198623', '201842', '201848', '205011', '206731', '207949', '208896',
        '540182', '540300')
    medIdsByActiveRx['Oxycodone'] = ('5940', '5941', '6122', '6981', '10812',
                                     '10813', '10814', '14919', '16121',
                                     '16123', '16129', '16130', '19187',
                                     '26637', '26638', '27920', '27921',
                                     '27922', '27923', '28897', '28899',
                                     '28900', '31851', '31852', '31863',
                                     '31864', '92248', '126939', '200451',
                                     '203690', '203691', '203692', '203705',
                                     '203706', '203707', '204020', '204021')

    query = baseQuery()
    totalPatients = float(DBUtil.execute(query)[0][0])

    # print"Total Patients\t%s" % totalPatients

    # print"======= Dx Groups ===========";
    # print"Dx Group\tPt Count\tDx Rate";
    patientsPerDxGroup = dict()
    query = SQLQuery()
    query.addSelect("count(distinct prob.pat_id) as ptCount")
    query.addSelect("prob.%s" % DX_COL)
    query.addFrom("stride_problem_list as prob")
    query.addWhereOp("prob.noted_date", "<", LIMIT_DATE)
    query.addGroupBy("prob.%s" % DX_COL)
    if DOUBLE_DX:
        query.addSelect("prob2.%s" % DX_COL)
        query.addFrom("stride_problem_list as prob2")
        query.addWhere("prob.pat_id = prob2.pat_id")
        query.addWhereOp("prob2.noted_date", "<", LIMIT_DATE)
        query.addGroupBy("prob2.%s" % DX_COL)
    results = DBUtil.execute(query)
    for row in results:
        patientCount = row[0]
        dxGroup = row[1]
        if DOUBLE_DX:
            dxGroup = (dxGroup, row[2])
            # Composite tuple including second diagnosis
        patientsPerDxGroup[dxGroup] = patientCount

    progress = ProgressDots()
    for activeRx, medIds in medIdsByActiveRx.iteritems():
        query = baseQuery()
        query.addWhereIn("medication_id", medIds)

        # Baseline prescription rates
        rxPtCount = DBUtil.execute(query)[0][0]

        # print"====== Rx Counts ======";
        # print"Rx\tPt Count\tRx Rate";
        # print"%s\t%s\t%s" % (activeRx, rxPtCount, (rxPtCount/totalPatients));

        # print"======== Rx-Dx Association ========";
        statIds = (
            "P-Fisher",
            "P-YatesChi2",
            "oddsRatio",
            "relativeRisk",
            "interest",
            "LR+",
            "LR-",
            "sensitivity",
            "specificity",
            "PPV",
            "NPV",
        )
        if progress.getCounts() == 0:
            headerCols = [
                "Rx", "Dx", "RxDxCount", "RxCount", "DxCount", "Total"
            ]
            if DOUBLE_DX:
                headerCols.insert(2, "Dx2")
            headerCols.extend(statIds)
            headerStr = str.join("\t", headerCols)
            print headerStr

        # Query out per diagnosis group, but do as aggregate grouped query
        query.addSelect("prob.%s" % DX_COL)
        query.addFrom("stride_problem_list as prob")
        query.addWhere("med.pat_id = prob.pat_id")
        query.addWhereOp("prob.noted_date", "<", LIMIT_DATE)
        #query.addWhereIn("prob.%s" % DX_COL, dxKeys );
        query.addGroupBy("prob.%s" % DX_COL)
        if DOUBLE_DX:
            query.addSelect("prob2.%s" % DX_COL)
            query.addFrom("stride_problem_list as prob2")
            query.addWhere("prob.pat_id = prob2.pat_id")
            query.addWhereOp("prob2.noted_date", "<", LIMIT_DATE)
            query.addGroupBy("prob2.%s" % DX_COL)
        results = DBUtil.execute(query)
        for row in results:
            rxDxPtCount = row[0]
            dxGroup = row[1]
            if DOUBLE_DX:
                dxGroup = (dxGroup, row[2])
                # Composite tuple including second diagnosis
            dxPtCount = patientsPerDxGroup[dxGroup]

            conStats = ContingencyStats(rxDxPtCount, rxPtCount, dxPtCount,
                                        totalPatients)

            dataCells = [
                activeRx, dxGroup, rxDxPtCount, rxPtCount, dxPtCount,
                totalPatients
            ]
            if DOUBLE_DX:
                dataCells[1] = dxGroup[0]
                dataCells.insert(2, dxGroup[1])
            for statId in statIds:
                try:
                    dataCells.append(conStats[statId])
                except ZeroDivisionError:
                    dataCells.append(None)
            for i, value in enumerate(dataCells):
                dataCells[i] = str(value)
                # String conversion to allow for concatenation below
            dataStr = str.join("\t", dataCells)
            print dataStr
            progress.update()
    progress.printStatus()