Esempio n. 1
0
    def _get_average_orders_per_patient(self):
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Get average number of results for this lab test per patient.
        query = SQLQuery()
        if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE':  #TODO: add STRIDE component routine
            query.addSelect('CAST(pat_id AS BIGINT) AS pat_id')
            query.addSelect('COUNT(sop.order_proc_id) AS num_orders')
            query.addFrom('stride_order_proc AS sop')
            query.addFrom('stride_order_results AS sor')
            query.addWhere('sop.order_proc_id = sor.order_proc_id')
            query.addWhereIn("proc_code", [self._lab_panel])
            components = self._get_components_in_lab_panel()
            query.addWhereIn("base_name", components)
            query.addGroupBy('pat_id')

        elif LocalEnv.DATASET_SOURCE_NAME == 'UMich':
            query.addSelect('CAST(pat_id AS BIGINT) AS pat_id')
            query.addSelect('COUNT(order_proc_id) AS num_orders')
            query.addFrom('labs')
            query.addWhereIn(self._varTypeInTable, [self._lab_var])
            components = self._get_components_in_lab_panel()
            query.addWhereIn("base_name", components)
            query.addGroupBy('pat_id')
        log.debug('Querying median orders per patient...')
        results = DBUtil.execute(query)
        order_counts = [row[1] for row in results]
        if len(order_counts) == 0:
            error_msg = '0 orders for lab "%s."' % self._lab_var
            log.critical(error_msg)
            raise Exception(error_msg)
            # sys.exit('[ERROR] %s' % error_msg) # sxu: sys.exit cannot be caught by Exception
        else:
            return numpy.median(order_counts)
Esempio n. 2
0
    def _get_random_patient_list(self):
        #sx: this function is for avoid RANDOM() on the database
        cursor = self._connection.cursor()

        # Get average number of results for this lab test per patient.
        query = SQLQuery()
        query.addSelect('pat_id')
        query.addSelect('COUNT(sop.order_proc_id) AS num_orders')
        query.addFrom('stride_order_proc AS sop')
        query.addFrom('stride_order_results AS sor')
        query.addWhere('sop.order_proc_id = sor.order_proc_id')
        ##
        query.addWhereIn("base_name", [self._component])
        query.addGroupBy('pat_id')
        log.debug('Querying median orders per patient...')

        results = DBUtil.execute(query)

        order_counts = [ row[1] for row in results ]


        if len(results) == 0:
            error_msg = '0 orders for component "%s."' % self._component #sx
            log.critical(error_msg)
            sys.exit('[ERROR] %s' % error_msg)
        else:
            avg_orders_per_patient = numpy.median(order_counts)
            log.info('avg_orders_per_patient: %s' % avg_orders_per_patient)
            # Based on average # of results, figure out how many patients we'd
            # need to get for a feature matrix of requested size.
            self._num_patients = int(numpy.max([self._num_requested_episodes / \
                avg_orders_per_patient, 1]))
            # Some components may have fewer associated patients than the required sample size
            patient_number_chosen = min([len(results),self._num_patients]) #
            inds_random_patients = numpy.random.choice(len(results), size=patient_number_chosen, replace=False)
            # print 'inds_random_patients:', inds_random_patients
            pat_IDs_random_patients = []
            for ind in inds_random_patients:
                pat_IDs_random_patients.append(results[ind][0])
            # print pat_IDs_random_patients
            return pat_IDs_random_patients
Esempio n. 3
0
    def _get_average_orders_per_patient(self):
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Get average number of results for this lab test per patient.
        query = SQLQuery()
        query.addSelect('pat_id')
        query.addSelect('COUNT(sop.order_proc_id) AS num_orders')
        query.addFrom('stride_order_proc AS sop')
        query.addFrom('stride_order_results AS sor')
        query.addWhere('sop.order_proc_id = sor.order_proc_id')
        query.addWhereIn("proc_code", [self._lab_panel])
        components = self._get_components_in_lab_panel()
        query.addWhereIn("base_name", components)
        query.addGroupBy('pat_id')
        log.debug('Querying median orders per patient...')
        results = DBUtil.execute(query)
        order_counts = [ row[1] for row in results ]
        if len(order_counts) == 0:
            error_msg = '0 orders for lab panel "%s."' % self._lab_panel
            log.critical(error_msg)
            sys.exit('[ERROR] %s' % error_msg)
        else:
            return numpy.median(order_counts)
Esempio n. 4
0
    def _get_random_patient_list(self):
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        query = SQLQuery()
        query.addSelect('CAST(pat_id AS BIGINT) AS pat_id')

        if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE':
            if self._isLabPanel:
                query.addSelect('COUNT(sop.order_proc_id) AS num_orders')
                query.addFrom('stride_order_proc AS sop')
                query.addFrom('stride_order_results AS sor')

                if self._time_limit:
                    if self._time_limit[0]:
                        query.addWhere("sop.order_time > '%s'" %
                                       self._time_limit[0])
                    if self._time_limit[1]:
                        query.addWhere("sop.order_time < '%s'" %
                                       self._time_limit[1])

                query.addWhere('sop.order_proc_id = sor.order_proc_id')
                query.addWhereIn('proc_code', [self._lab_var])
                '''
                sbala: Technically it's possible for someone to get a lab ordered without getting results
                '''
                query.addWhereIn("base_name", self._lab_components)

            else:
                query.addSelect('COUNT(sor.order_proc_id) AS num_orders')
                query.addFrom('stride_order_proc AS sop')
                query.addFrom('stride_order_results AS sor')
                query.addWhere('sop.order_proc_id = sor.order_proc_id')
                ##
                query.addWhereIn("base_name", [self._lab_var])
        elif LocalEnv.DATASET_SOURCE_NAME == 'UMich':
            query_str = "SELECT CAST(pat_id AS BIGINT) AS pat_id , "
            query_str += "COUNT(order_proc_id) AS num_orders "
            query_str += "FROM labs "
            # query_str += " WHERE %s IN (%s) "%(self._varTypeInTable, self._lab_var)
            query_str += "WHERE %s = '%s' " % (self._varTypeInTable,
                                               self._lab_var)
            if self.notUsePatIds:
                query_str += "AND pat_id NOT IN ("
                for pat_id in self.notUsePatIds:
                    query_str += "%s," % pat_id
                query_str = query_str[:-1] + ") "  # get rid of comma
            query_str += "GROUP BY pat_id"

            log.debug('Querying median orders per patient...')

            # TODO: best way to integrate UMich code
            results = DBUtil.execute(query_str)
            order_counts = [row[1] for row in results]

            if len(results) == 0:
                error_msg = '0 orders for order "%s."' % self._lab_var  # sx
                log.critical(error_msg)
                raise Exception(error_msg)
                # sys.exit('[ERROR] %s' % error_msg) # sxu: sys.exit cannot be caught by Exception
            else:
                avg_orders_per_patient = numpy.median(order_counts)
                log.info('avg_orders_per_patient: %s' % avg_orders_per_patient)
                # Based on average # of results, figure out how many patients we'd
                # need to get for a feature matrix of requested size.
                self._num_patients = int(numpy.max([self._num_requested_episodes / \
                                                    avg_orders_per_patient, 1]))
                # Some components may have fewer associated patients than the required sample size
                patient_number_chosen = min([len(results),
                                             self._num_patients])  #
                inds_random_patients = numpy.random.choice(
                    len(results), size=patient_number_chosen, replace=False)
                # print 'inds_random_patients:', inds_random_patients
                pat_IDs_random_patients = []
                for ind in inds_random_patients:
                    pat_IDs_random_patients.append(results[ind][0])
                return pat_IDs_random_patients

        else:
            query.addSelect('COUNT(order_proc_id) AS num_orders')
            query.addFrom('labs')
            if self._isLabPanel:
                query.addWhereIn("proc_code", [self._lab_var])
                query.addWhereIn("base_name", self._lab_components)
            else:
                query.addWhereIn("base_name", [self._lab_var])
        '''
        Fo hold-out set, do not use the patients already used in training/validation. 
        '''
        if self._notUsePatIds:
            query.addWhereNotIn('pat_id', self._notUsePatIds)

        query.addGroupBy('pat_id')

        log.debug('Querying the number of orders per patient...')

        results = DBUtil.execute(query)

        order_counts = [row[1] for row in results]

        if len(results) == 0:
            error_msg = '0 orders for component "%s."' % self._lab_var  # sx
            log.critical(error_msg)
            raise Exception(error_msg)
            # sys.exit('[ERROR] %s' % error_msg) # sxu: sys.exit cannot be caught by Exception
        else:
            avg_orders_per_patient = numpy.median(order_counts)
            log.info('avg_orders_per_patient: %s' % avg_orders_per_patient)
            # Based on average # of results, figure out how many patients we'd
            # need to get for a feature matrix of requested size.
            self._num_patients = int(numpy.max([self._num_requested_episodes / \
                                                avg_orders_per_patient, 1]))

            # Some components may have fewer associated patients than the required sample size
            patient_number_chosen = min([len(results), self._num_patients])  #
            '''
            Set seed to ensure re-producibility of patient episodes.
            Recover int random_state here, since numpy requires int while sql requires [-1,1]
            '''
            numpy.random.seed(int(self._random_state * float(sys.maxsize)))
            inds_random_patients = numpy.random.choice(
                len(results), size=patient_number_chosen, replace=False)

            pat_IDs_random_patients = [
                results[ind][0] for ind in inds_random_patients
            ]

            return pat_IDs_random_patients
Esempio n. 5
0
    def _get_random_patient_list(self):
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE':
            # Get average number of results for this lab test per patient.
            avg_orders_per_patient = self._get_average_orders_per_patient()
            log.info('avg_orders_per_patient: %s' % avg_orders_per_patient)
            # Based on average # of results, figure out how many patients we'd
            # need to get for a feature matrix of requested size.
            self._num_patients = int(numpy.max([self._num_requested_episodes / \
                avg_orders_per_patient, 1]))

            # Get numPatientsToQuery random patients who have gotten test.
            # TODO(sbala): Have option to feed in a seed for the randomness.
            query = SQLQuery()
            query.addSelect('CAST(pat_id AS BIGINT) AS pat_id')
            query.addFrom('stride_order_proc AS sop')
            query.addWhereIn('proc_code', [self._lab_var])  # TODO: components
            query.addOrderBy('RANDOM()')
            query.setLimit(self._num_patients)
            log.debug('Querying random patient list...')
            results = DBUtil.execute(query)

            # Get patient list.
            random_patient_list = [row[0] for row in results]

            return random_patient_list

        elif LocalEnv.DATASET_SOURCE_NAME == 'UMich':

            # Get average number of results for this lab test per patient.
            # query = SQLQuery()
            # query.addSelect('CAST(pat_id AS BIGINT) AS pat_id')
            # query.addSelect('COUNT(order_proc_id) AS num_orders')
            # query.addFrom('labs')
            # query.addWhereIn(self._varTypeInTable, [self._lab_var])
            # components = self._get_components_in_lab_panel()
            # query.addWhereIn("base_name", components)
            #
            # if self.notUsePatIds:
            #     query.addWhereNotIn("pat_id", self.notUsePatIds)
            #
            # query.addGroupBy('pat_id')

            query_str = "SELECT CAST(pat_id AS BIGINT) AS pat_id , "
            query_str += "COUNT(order_proc_id) AS num_orders "
            query_str += "FROM labs "
            #query_str += " WHERE %s IN (%s) "%(self._varTypeInTable, self._lab_var)
            query_str += "WHERE %s = '%s' " % (self._varTypeInTable,
                                               self._lab_var)
            if self.notUsePatIds:
                query_str += "AND pat_id NOT IN ("
                for pat_id in self.notUsePatIds:
                    query_str += "%s," % pat_id
                query_str = query_str[:-1] + ") "  # get rid of comma
            query_str += "GROUP BY pat_id"

            log.debug('Querying median orders per patient...')

            results = DBUtil.execute(query_str)
            order_counts = [row[1] for row in results]

            if len(results) == 0:
                error_msg = '0 orders for order "%s."' % self._lab_var  #sx
                log.critical(error_msg)
                raise Exception(error_msg)
                # sys.exit('[ERROR] %s' % error_msg) # sxu: sys.exit cannot be caught by Exception
            else:
                avg_orders_per_patient = numpy.median(order_counts)
                log.info('avg_orders_per_patient: %s' % avg_orders_per_patient)
                # Based on average # of results, figure out how many patients we'd
                # need to get for a feature matrix of requested size.
                self._num_patients = int(numpy.max([self._num_requested_episodes / \
                    avg_orders_per_patient, 1]))
                # Some components may have fewer associated patients than the required sample size
                patient_number_chosen = min([len(results),
                                             self._num_patients])  #
                inds_random_patients = numpy.random.choice(
                    len(results), size=patient_number_chosen, replace=False)
                # print 'inds_random_patients:', inds_random_patients
                pat_IDs_random_patients = []
                for ind in inds_random_patients:
                    pat_IDs_random_patients.append(results[ind][0])
                return pat_IDs_random_patients