Ejemplo n.º 1
0
    def tearDown(self):
        """Restore state from any setUp or test steps"""
        log.info("Purge test records from the database")

        DBUtil.execute \
            ("""delete from patient_item 
            where clinical_item_id in 
            (   select clinical_item_id
                from clinical_item as ci, clinical_item_category as cic
                where ci.clinical_item_category_id = cic.clinical_item_category_id
                and cic.source_table = '%s'
            )
            """ % TEST_SOURCE_TABLE
             )
        DBUtil.execute \
            ("""delete from clinical_item 
            where clinical_item_category_id in 
            (   select clinical_item_category_id 
                from clinical_item_category 
                where source_table = '%s'
            )
            """ % TEST_SOURCE_TABLE
             )
        DBUtil.execute("delete from clinical_item_category where source_table = '%s';" % TEST_SOURCE_TABLE)

        bqCursor = self.bqConn.cursor()
        bqCursor.execute('DELETE FROM %s.patient_item WHERE true;' % TEST_DEST_DATASET)
        bqCursor.execute('DELETE FROM %s.clinical_item WHERE true;' % TEST_DEST_DATASET)
        bqCursor.execute('DELETE FROM %s.clinical_item_category WHERE true;' % TEST_DEST_DATASET)

        DBTestCase.tearDown(self)
Ejemplo n.º 2
0
    def dumpPatientItemToCsv(self, tempDir, batchCounter=999):
        log.info(
            'Dumping patient_item for batch {} to CSV'.format(batchCounter))

        DBUtil.dumpTableToCsv(
            'patient_item',
            '{}/{}_patient_item.csv'.format(tempDir, batchCounter))
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        log.info("Sourcing from BigQuery DB")

        self.patientIds = [
            'JCd5ef6e', 'JCce317d', 'JCe83f82', 'JCe5fc81', 'JCdb8fe4',
            'JCcdc6a0', 'JCd37637', 'JCdbb57e', 'JCcebdef', 'JCcc41b3',
            'JCe160b3', 'JCe8415d', 'JCdb1735', 'JCeb8fe9', 'JCe362b1',
            'JCcca36e', 'JCddddf4', 'JCe683c1', 'JCe74388', 'JCd30ac4',
            'JCd1bb22', 'JCe3397c', 'JCccb16c', 'JCd5da6d', 'JCd6f915',
            'JCe3e96d', 'JCd43db0', 'JCe5a52f', 'JCd9f7b5', 'JCd60bb3',
            'JCe66004', 'JCe4a6c2', 'JCceb239', 'JCda9846', 'JCce3176',
            'JCe098ca', 'JCd31af1', 'JCe796fd', 'JCcc9243', 'JCd05308',
            'JCea3982', 'JCd99619', 'JCd99366', 'JCdb087f', 'JCd9f2b3',
            'JCe8a2d4', 'JCd19201', 'JCcdc146', 'JCe05414', 'JCd98ef5'
        ]

        self.pat_id_csv = '/tmp/tmp_test_pat_id.csv'
        with open(self.pat_id_csv, 'wb') as f:
            for id in ['rit_uid'] + self.patientIds:
                f.write("%s\n" % id)

        self.bqConn = bigQueryUtil.connection()
        self.converter = STARRDemographicsConversion()  # Instance to test on
Ejemplo n.º 4
0
    def convertSourceItems(self, convOptions, conn=None):
        """Primary run function to process the contents of the raw source
        table and convert them into equivalent patient_item, clinical_item, and clinical_item_category entries.
        Should look for redundancies after the fact to catch repeated conversions.

        startDate - If provided, only return items whose ordering_date is on or after that date.
        endDate - If provided, only return items whose ordering_date is before that date.
        """
        log.info("Conversion for items dated %s to %s" %
                 (convOptions.startDate, convOptions.endDate))
        progress = ProgressDots()

        extConn = conn is not None
        if not extConn:
            conn = self.connFactory.connection()

        try:
            # Next round for medications directly from order_med table not addressed in medmix  TODO (nodir) seems like an unrelated comment?
            category = self.categoryFromSourceItem(conn)
            for sourceItem in self.querySourceItems(convOptions):
                log.debug('sourceItem: {}'.format(sourceItem))
                self.convertSourceItem(category, sourceItem, conn=conn)
                progress.Update()

        finally:
            conn.close()

        progress.PrintStatus()
Ejemplo n.º 5
0
    def create_new_table_from_schema(self, dataset_id, table_id, schema):
        '''
        https://cloud.google.com/bigquery/docs/tables#create-table

        :param dataset_id: dataset name
        :param table_id: table name
        :param schema:
            schema = [
                bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED', description='blah'),
                bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'),
            ]
        :return: None
        '''

        dataset_ref = self.client.dataset(dataset_id)
        table_ref = dataset_ref.table(table_id)

        try:
            self.client.get_table(table_ref)
            #print(f'Table {table_id} in dataset {dataset_id} already exists! Skipping create operation.')
        except NotFound:
            # Construct a full Table object to send to the API.
            table = bigquery.Table(table_ref, schema=schema)
            table = self.client.create_table(table)  # API request
            log.info('''
                    Table {} in dataset {}
                    created successfully project: {}.
                    '''.format(table.table_id, dataset_id,
                               self.client.project))
            '''
Ejemplo n.º 6
0
    def uploadClinicalTablesCsvToBQ(self, tempDir, datasetId):
        log.info('Uploading clinical_item_category CSV to BQ dataset %s' % datasetId)
        clinical_item_category_schema = [
            bigquery.SchemaField('clinical_item_category_id', 'INT64', 'REQUIRED', None, ()),
            bigquery.SchemaField('source_table', 'STRING', 'REQUIRED', None, ()),
            bigquery.SchemaField('description', 'STRING', 'NULLABLE', None, ()),
            bigquery.SchemaField('default_recommend', 'INT64', 'NULLABLE', None, ())]

        clinical_item_category_csv_path = tempDir + '/clinical_item_category.csv'

        bigQueryUtil.headerChecker(clinical_item_category_csv_path, [sf.name for sf in clinical_item_category_schema])

        self.bqClient.load_csv_to_table(datasetId, 'clinical_item_category', clinical_item_category_csv_path,
                                        skip_rows=1, append_to_table=True)
        # auto_detect_schema=False, schema=clinical_item_category_schema)

        log.info('Uploading clinical_item CSV to BQ dataset %s' % datasetId)
        clinical_item_schema = [bigquery.SchemaField('clinical_item_id', 'INT64', 'REQUIRED', None, ()),
                                bigquery.SchemaField('clinical_item_category_id', 'INT64', 'REQUIRED', None, ()),
                                bigquery.SchemaField('external_id', 'INT64', 'NULLABLE', None, ()),
                                bigquery.SchemaField('name', 'STRING', 'REQUIRED', None, ()),
                                bigquery.SchemaField('description', 'STRING', 'NULLABLE', None, ()),
                                bigquery.SchemaField('default_recommend', 'INT64', 'NULLABLE', None, ()),
                                bigquery.SchemaField('item_count', 'FLOAT64', 'NULLABLE', None, ()),
                                bigquery.SchemaField('patient_count', 'FLOAT64', 'NULLABLE', None, ()),
                                bigquery.SchemaField('encounter_count', 'FLOAT64', 'NULLABLE', None, ()),
                                bigquery.SchemaField('analysis_status', 'INT64', 'NULLABLE', None, ()),
                                bigquery.SchemaField('outcome_interest', 'INT64', 'NULLABLE', None, ())]

        clinical_item_csv_path = tempDir + '/clinical_item.csv'

        bigQueryUtil.headerChecker(clinical_item_csv_path, [sf.name for sf in clinical_item_schema])

        self.bqClient.load_csv_to_table(datasetId, 'clinical_item', clinical_item_csv_path,
                                        skip_rows=1, append_to_table=True)
Ejemplo n.º 7
0
    def dumpItemCollectionTablesToCsv(self, tempDir):
        log.info('Dumping item_collection_item and item_collection to CSV')

        DBUtil.dumpTableToCsv('item_collection_item',
                              '{}/item_collection_item.csv'.format(tempDir))
        DBUtil.dumpTableToCsv('item_collection',
                              '{}/item_collection.csv'.format(tempDir))
Ejemplo n.º 8
0
 def patientItemFromSourceItem(self, sourceItem, clinicalItem, conn):
     # Produce a patient_item record model for the given sourceItem
     patientItem = \
         RowItemModel \
             ({"external_id": int(sourceItem["prov_map_id"][2:], 16),
               "patient_id": int(sourceItem["rit_uid"][2:], 16),
               "encounter_id": sourceItem["pat_enc_csn_id_coded"],
               "clinical_item_id": clinicalItem["clinical_item_id"],
               "item_date": sourceItem["trtmnt_tm_begin_dt_jittered"],
               }
              )
     insertQuery = DBUtil.buildInsertQuery("patient_item",
                                           patientItem.keys())
     insertParams = patientItem.values()
     try:
         # Optimistic insert of a new unique item
         DBUtil.execute(insertQuery, insertParams, conn=conn)
         patientItem["patient_item_id"] = DBUtil.execute(
             DBUtil.identityQuery("patient_item"), conn=conn)[0][0]
     except conn.IntegrityError, err:
         # If turns out to be a duplicate, okay, pull out existing ID and continue to insert whatever else is possible
         log.info(
             err
         )  # Lookup just by the composite key components to avoid attempting duplicate insertion again
         searchPatientItem = \
             {"patient_id": patientItem["patient_id"],
              "clinical_item_id": patientItem["clinical_item_id"],
              "item_date": patientItem["item_date"],
              }
         (patientItem["patient_item_id"],
          isNew) = DBUtil.findOrInsertItem("patient_item",
                                           searchPatientItem,
                                           conn=conn)
Ejemplo n.º 9
0
    def convertSourceItems(self, convOptions):
        """Primary run function to process the contents of the order_med
        table and convert them into equivalent patient_item, clinical_item, and clinical_item_category entries.
        Should look for redundancies after the fact to catch repeatEd conversions.

        startDate - If provided, only return items whose order_time_jittered is on or after that date.
        endDate - If provided, only return items whose order_time_jittered is before that date.
        """
        log.info("Conversion for items dated {} to {}".format(
            convOptions.startDate, convOptions.endDate))
        progress = ProgressDots()
        conn = self.connFactory.connection()
        try:
            # Load up the medication mapping table to facilitate subsequent conversions
            rxcuiDataByMedId = self.loadRXCUIData()

            # Next round for medications directly from order_med table not addressed in medmix
            for sourceItem in self.querySourceItems(rxcuiDataByMedId,
                                                    convOptions,
                                                    progress=progress,
                                                    conn=conn):
                self.convertSourceItem(sourceItem, conn=conn)
                progress.Update()

        finally:
            conn.close()
        progress.PrintStatus()
Ejemplo n.º 10
0
    def dumpClinicalTablesToCsv(self, tempDir):
        log.info('Dumping clinical_item and clinical_item_category to CSV')

        DBUtil.dumpTableToCsv('clinical_item',
                              '{}/clinical_item.csv'.format(tempDir))
        DBUtil.dumpTableToCsv('clinical_item_category',
                              '{}/clinical_item_category.csv'.format(tempDir))
    def uploadClinicalTablesCsvToBQ(self, tempDir, datasetId):
        log.info('Uploading clinical_item_category CSV to BQ dataset %s' % datasetId)
        clinical_item_category_schema = self.bqClient.client.get_table(
                self.bqClient.client.dataset('clinical_item2018', 'mining-clinical-decisions')
                    .table('clinical_item_category')
        ).schema

        clinical_item_category_csv_path = tempDir + '/clinical_item_category.csv'

        bigQueryUtil.headerChecker(clinical_item_category_csv_path, [sf.name for sf in clinical_item_category_schema])

        self.bqClient.load_csv_to_table(datasetId, 'clinical_item_category', clinical_item_category_csv_path,
                                        skip_rows=1, append_to_table=True)
        # auto_detect_schema=False, schema=clinical_item_category_schema)

        log.info('Uploading clinical_item CSV to BQ dataset %s' % datasetId)
        clinical_item_schema = self.bqClient.client.get_table(
                self.bqClient.client.dataset('clinical_item2018', 'mining-clinical-decisions').table('clinical_item')
        ).schema

        clinical_item_csv_path = tempDir + '/clinical_item.csv'

        bigQueryUtil.headerChecker(clinical_item_csv_path, [sf.name for sf in clinical_item_schema])

        self.bqClient.load_csv_to_table(datasetId, 'clinical_item', clinical_item_csv_path,
                                        skip_rows=1, append_to_table=True)
Ejemplo n.º 12
0
    def uploadPatientItemCsvToBQ(self, tempDir, datasetId, batchCounter=999):
        log.info('Uploading patient_item CSV to BQ dataset %s for batch %s' %
                 (datasetId, batchCounter))
        patient_item_schema = [
            bigquery.SchemaField('patient_item_id', 'INT64', 'REQUIRED', None,
                                 ()),
            bigquery.SchemaField('external_id', 'INT64', 'NULLABLE', None, ()),
            bigquery.SchemaField('patient_id', 'INT64', 'REQUIRED', None, ()),
            bigquery.SchemaField('clinical_item_id', 'INT64', 'REQUIRED', None,
                                 ()),
            bigquery.SchemaField('item_date', 'TIMESTAMP', 'REQUIRED', None,
                                 ()),
            bigquery.SchemaField('analyze_date', 'TIMESTAMP', 'NULLABLE', None,
                                 ()),
            bigquery.SchemaField('encounter_id', 'INT64', 'NULLABLE', None,
                                 ()),
            bigquery.SchemaField('text_value', 'STRING', 'NULLABLE', None, ()),
            bigquery.SchemaField('num_value', 'FLOAT64', 'NULLABLE', None, ()),
            bigquery.SchemaField('source_id', 'INT64', 'NULLABLE', None, ()),
            bigquery.SchemaField('item_date_utc', 'TIMESTAMP', 'NULLABLE',
                                 None, ())
        ]

        csv_path = tempDir + os.path.sep + str(
            batchCounter) + '_patient_item.csv'

        bigQueryUtil.headerChecker(csv_path,
                                   [sf.name for sf in patient_item_schema])

        self.bqClient.load_csv_to_table(datasetId,
                                        'patient_item',
                                        csv_path,
                                        schema=patient_item_schema,
                                        skip_rows=1,
                                        append_to_table=True)
Ejemplo n.º 13
0
 def removePatientItemCsv(self, tempDir, batchCounter):
     log.info('Removing patient_item CSV for batch %s' % batchCounter)
     if os.path.exists(tempDir + '/' + str(batchCounter) +
                       '_patient_item.csv'):
         os.remove(tempDir + '/' + str(batchCounter) + '_patient_item.csv')
     else:
         print(tempDir + '/' + str(batchCounter) +
               '_patient_item.csv does not exist')
    def dumpPatientItemToCsv(self, tempDir, batchCounter):
        log.info('Dumping patient_item for batch %s to CSV' % batchCounter)

        DBUtil.execute(
            '''
            COPY patient_item TO '%s/%s_patient_item.csv' DELIMITER ',' CSV HEADER;
            ''' % (tempDir, batchCounter)
        )
Ejemplo n.º 15
0
    def dumpPatientItemCollectionLinkToCsv(self, tempDir, batchCounter=999):
        log.info(
            'Dumping patient_item_collection_link for batch {} to CSV'.format(
                batchCounter))

        DBUtil.dumpTableToCsv(
            'patient_item_collection_link',
            '{}/{}_patient_item_collection_link.csv'.format(
                tempDir, batchCounter))
Ejemplo n.º 16
0
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)

        log.info("Sourcing from BigQuery DB")
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        self.converter = STARRTreatmentTeamConversion()  # Instance to test on
        self.bqConn = self.converter.bqConn
        self.starrUtil = STARRUtil.StarrCommonUtils(self.converter.bqClient)
Ejemplo n.º 17
0
    def removeClinicalTablesCsv(self, tempDir):
        log.info('Removing clinical_item and clinical_item_category CSVs')
        if os.path.exists(tempDir + '/clinical_item.csv'):
            os.remove(tempDir + '/clinical_item.csv')
        else:
            print(tempDir + '/clinical_item.csv does not exist')

        if os.path.exists(tempDir + '/clinical_item_category.csv'):
            os.remove(tempDir + '/clinical_item_category.csv')
        else:
            print(tempDir + '/clinical_item_category.csv does not exist')
Ejemplo n.º 18
0
    def tearDown(self):
        """Restore state from any setUp or test steps"""
        log.info("Purge test records from the database")

        bqCursor = self.bqConn.cursor()
        bqCursor.execute('DELETE FROM %s.%s WHERE true;' % (TEST_DEST_DATASET, TEST_TABLE_ID))

        log.info("Removing tmp CSV files")
        if os.path.exists(self.tmp_csv_path):
            os.remove(self.tmp_csv_path)
        if os.path.exists(self.tmp_dummy_csv_path):
            os.remove(self.tmp_dummy_csv_path)
    def uploadPatientItemCsvToBQ(self, tempDir, batchCounter, datasetId):
        log.info('Uploading patient_item CSV to BQ dataset %s for batch %s' % (datasetId, batchCounter))

        patient_item_schema = self.bqClient.client.get_table(
                self.bqClient.client.dataset('clinical_item2018', 'mining-clinical-decisions').table('patient_item')
        ).schema

        csv_path = tempDir + '/' + str(batchCounter) + '_patient_item.csv'

        bigQueryUtil.headerChecker(csv_path, [sf.name for sf in patient_item_schema])

        self.bqClient.load_csv_to_table(datasetId, 'patient_item', csv_path, skip_rows=1, append_to_table=True)
Ejemplo n.º 20
0
    def patientItemFromSourceItem(self, sourceItem, clinicalItem, conn):
        # some prov_map_id values are NULL in starr_datalake2018
        if sourceItem["prov_map_id"] is not None:
            # prov_map_id starts with letters, we're interested only in number parts
            external_id = int(
                re.sub("[A-Z]+(\\d+)", "\\1", sourceItem["prov_map_id"]), 16)
        else:
            external_id = None

        # Produce a patient_item record model for the given sourceItem
        patientItem = RowItemModel({
            "external_id":
            external_id,
            "patient_id":
            int(sourceItem["rit_uid"][2:], 16),
            "encounter_id":
            sourceItem["pat_enc_csn_id_coded"],
            "clinical_item_id":
            clinicalItem["clinical_item_id"],
            "item_date":
            str(sourceItem["trtmnt_tm_begin_dt_jittered"]
                ),  # without str(), the time is being converted in postgres
            "item_date_utc":
            str(sourceItem["trtmnt_tm_begin_dt_jittered_utc"]
                )  # without str(), the time is being converted in postgres
        })

        insertQuery = DBUtil.buildInsertQuery("patient_item",
                                              list(patientItem.keys()))
        insertParams = list(patientItem.values())
        try:
            # Optimistic insert of a new unique item
            DBUtil.execute(insertQuery, insertParams, conn=conn)
            # Retrieve id of just inserted row
            patientItem["patient_item_id"] = DBUtil.execute(
                DBUtil.identityQuery("patient_item"), conn=conn)[0][0]
        except conn.IntegrityError as err:
            # If turns out to be a duplicate, okay, pull out existing ID and continue to insert whatever else is possible
            log.info(
                err
            )  # Lookup just by the composite key components to avoid attempting duplicate insertion again

            searchPatientItem = {
                "patient_id": patientItem["patient_id"],
                "clinical_item_id": patientItem["clinical_item_id"],
                "item_date": patientItem["item_date"],
            }
            (patientItem["patient_item_id"],
             isNew) = DBUtil.findOrInsertItem("patient_item",
                                              searchPatientItem,
                                              conn=conn)
        return patientItem
Ejemplo n.º 21
0
    def removePatientItemAddedLines(self, source_table):
        """delete added records"""
        log.info('Removing patient_item added lines in PSQL DB')

        DBUtil.execute("""delete from patient_item 
                where clinical_item_id in 
                (   select clinical_item_id
                    from clinical_item as ci, clinical_item_category as cic
                    where ci.clinical_item_category_id = cic.clinical_item_category_id
                    and cic.source_table = '{}'
                );
                """.format(source_table),
                       conn=self.pgConn)
Ejemplo n.º 22
0
    def removePatientItemCollectionLinkAddedLines(self, source_table):
        """delete added records"""
        log.info(
            'Removing patient_item_collection_link added lines in PSQL DB')

        DBUtil.execute("""delete from patient_item_collection_link pi
               using item_collection_item ici, clinical_item ci, clinical_item_category cic
               where pi.item_collection_item_id = ici.item_collection_item_id
                 and ici.clinical_item_id = ci.clinical_item_id
                 and ci.clinical_item_category_id = cic.clinical_item_category_id
                 and cic.source_table = '{}';
                 """.format(source_table),
                       conn=self.pgConn)
    def dumpClinicalTablesToCsv(self, tempDir):
        log.info('Dumping clinical_item and clinical_item_category to CSV')

        DBUtil.execute(
            '''
            COPY clinical_item TO '%s/clinical_item.csv' DELIMITER ',' CSV HEADER;
            ''' % tempDir
        )

        DBUtil.execute(
            '''
            COPY clinical_item_category TO '%s/clinical_item_category.csv' DELIMITER ',' CSV HEADER;
            ''' % tempDir
        )
    def convertSourceItems(self, patientIds=None):
        """Primary run function to process the contents of the starr_datalake2018.demographic
        table and convert them into equivalent patient_item, clinical_item, and clinical_item_category entries.
        Should look for redundancies to avoid repeating conversion.

        patientIds - If provided, only process items for patient IDs matching those provided
        """
        log.info("Conversion for patients starting with: %s, %s total" % (patientIds[:5], len(patientIds)))
        progress = ProgressDots()

        with self.connFactory.connection() as conn:
            category_model = self.categoryFromSourceItem(conn)   # only 1 category - no need to have it in the loop
            for sourceItem in self.querySourceItems(patientIds, progress):
                self.convertSourceItem(category_model, sourceItem, conn)
Ejemplo n.º 25
0
    def removeClinicalTablesAddedLines(self, source_table):
        """delete added records"""
        log.info('Removing clinical_item and clinical_item_category added lines in PSQL DB')

        DBUtil.execute \
            ("""delete from clinical_item 
                    where clinical_item_category_id in 
                    (   select clinical_item_category_id 
                        from clinical_item_category 
                        where source_table = '%s'
                    );
                    """ % source_table, conn=self.pgConn
             )
        DBUtil.execute("delete from clinical_item_category where source_table = '%s';" % source_table, conn=self.pgConn)
    def test_dataConversion(self, name, aggregation):
        log.info("Generating test source data")
        self.generate_test_and_expected_data(self.TEST_DATA_SIZE,
                                             aggregate=aggregation)
        self.starrUtil.dump_test_data_to_csv(self.converter.HEADERS,
                                             self.test_data,
                                             self.test_data_csv)
        self.starrUtil.upload_csv_to_bigquery(
            'starr_datalake2018', 'treatment_team', TEST_DEST_DATASET,
            'starr_treatment_team', self.test_data_csv, self.converter.HEADERS)

        log.debug("Run the conversion process...")
        conv_options = STARRTreatmentTeamConversion.ConversionOptions()
        conv_options.aggregate = aggregation
        temp_dir = tempfile.gettempdir()
        self.converter.convertAndUpload(conv_options,
                                        tempDir=temp_dir,
                                        targetDatasetId=TEST_DEST_DATASET)

        # Just query back for the same data, de-normalizing the data back to a general table
        test_query = \
            """
            select
                pi.external_id as pi_external_id,
                pi.patient_id,
                pi.encounter_id,
                cic.description as cic_description,
                ci.external_id as ci_external_id,
                ci.name,
                ci.description as ci_description,
                pi.item_date,
                pi.item_date_utc
            from
                %s.patient_item as pi,
                %s.clinical_item as ci,
                %s.clinical_item_category as cic
            where
                pi.clinical_item_id = ci.clinical_item_id and
                ci.clinical_item_category_id = cic.clinical_item_category_id and
                cic.source_table = '%s'
            order by
                pi.external_id desc, ci.external_id desc
            """ % (TEST_DEST_DATASET, TEST_DEST_DATASET, TEST_DEST_DATASET, TEST_SOURCE_TABLE)

        bq_cursor = self.bqConn.cursor()
        bq_cursor.execute(test_query)
        actual_data = [row.values() for row in bq_cursor.fetchall()]
        log.debug('actual data: {}'.format(actual_data))
        log.debug('expected data: {}'.format(self.expected_data))
        self.assertEqualTable(self.expected_data, actual_data)
Ejemplo n.º 27
0
    def removePatientItemAddedLines(self):
        """delete added records"""
        log.info('Removing patient_item added lines in PSQL DB')

        DBUtil.execute \
            ("""delete from patient_item 
                    where clinical_item_id in 
                    (   select clinical_item_id
                        from clinical_item as ci, clinical_item_category as cic
                        where ci.clinical_item_category_id = cic.clinical_item_category_id
                        and cic.source_table = '%s'
                    );
                    """ % SOURCE_TABLE
             )
Ejemplo n.º 28
0
    def main(self, argv):
        """Main method, callable from command line"""
        log.setLevel(logging.FATAL)

        usage_str = "usage: %prog [options]\n"
        parser = OptionParser(usage=usage_str)
        parser.add_option(
            "-s",
            "--startDate",
            dest="startDate",
            metavar="<startDate>",
            help=
            "Date string (e.g., 2011-12-15), if provided, will only run conversion on items with ordering time on or after this date."
        )
        parser.add_option(
            "-e",
            "--endDate",
            dest="endDate",
            metavar="<endDate>",
            help=
            "Date string (e.g., 2011-12-15), if provided, will only run conversion on items with ordering time before this date."
        )
        parser.add_option(
            "-n",
            "--normalizeMixtures",
            dest="normalizeMixtures",
            action="store_true",
            help=
            "If set, when find medication mixtures, will unravel / normalize into separate entries, one for each ingredient"
        )
        parser.add_option(
            "-d",
            "--doseCountLimit",
            dest="doseCountLimit",
            help=
            "Medication orders with a finite number of doses specified less than this limit will be labeled as different items than those without a number specified, or whose number is >= to this limit. Intended to distinguish things like IV single bolus / use vs. continuous infusions and standing medication orders"
        )
        (options, args) = parser.parse_args(argv[1:])

        log.info("Starting: " + str.join(" ", argv))
        timer = time.time()

        conv_options = ConversionOptions()
        conv_options.extract_parser_options(options)

        self.convertAndUpload(conv_options)

        timer = time.time() - timer
        log.info("%.3f seconds to complete", timer)
Ejemplo n.º 29
0
    def convertSourceItems(self, patientIds=None):
        """Primary run function to process the contents of the stride_patient
        table and convert them into equivalent patient_item, clinical_item, and clinical_item_category entries.
        Should look for redundancies to avoid repeating conversion.

        patientIds - If provided, only process items for patient IDs matching those provided
        """
        log.info("Conversion for patients starting with: %s, %s total" %
                 (patientIds[:5], len(patientIds)))
        progress = ProgressDots()

        with self.connFactory.connection() as conn:
            for sourceItem in self.querySourceItems(patientIds,
                                                    progress=progress):
                self.convertSourceItem(sourceItem, conn=conn)
    def setUp(self):
        log.setLevel(logging.INFO)  # without this no logs are printed
        """Prepare state for test cases"""
        DBTestCase.setUp(self)

        log.info("Sourcing from BigQuery DB")
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        self.converter = STARRTreatmentTeamConversion.STARRTreatmentTeamConversion(
        )  # Instance to test on
        self.bqConn = self.converter.bqConn
        self.starrUtil = STARRUtil.StarrCommonUtils(self.converter.bqClient)

        # point the converter to dummy source table
        STARRTreatmentTeamConversion.SOURCE_TABLE = TEST_SOURCE_TABLE