def convertAndUpload(self, convOptions, tempDir=tempfile.gettempdir(), removeCsvs=True, targetDatasetId='clinical_item2018'): """ Wrapper around primary run function, does conversion locally and uploads to BQ No batching done for treatment team since converted table is small """ conn = self.connFactory.connection() starrUtil = STARRUtil.StarrCommonUtils(self.bqClient) self.convertSourceItems(convOptions, conn) batchCounter = 99999 # TODO (nodir) why not 0? starrUtil.dumpPatientItemToCsv(tempDir, batchCounter) self.bqClient.reconnect_client() # refresh bq client connection starrUtil.uploadPatientItemCsvToBQ(tempDir, targetDatasetId, batchCounter) if removeCsvs: starrUtil.removePatientItemCsv(tempDir, batchCounter) starrUtil.removePatientItemAddedLines(SOURCE_TABLE) # For now keep the clinical_* tables, upload them them once all tables have been converted starrUtil.dumpClinicalTablesToCsv(tempDir) starrUtil.uploadClinicalTablesCsvToBQ(tempDir, targetDatasetId) if removeCsvs: starrUtil.removeClinicalTablesCsv(tempDir) starrUtil.removeClinicalTablesAddedLines(SOURCE_TABLE)
def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) log.info("Sourcing from BigQuery DB") ClinicalItemDataLoader.build_clinical_item_psql_schemata() self.converter = STARRTreatmentTeamConversion() # Instance to test on self.bqConn = self.converter.bqConn self.starrUtil = STARRUtil.StarrCommonUtils(self.converter.bqClient)
def __init__(self): """Default constructor""" self.bqConn = bigQueryUtil.connection() self.bqClient = bigQueryUtil.BigQueryClient() self.connFactory = DBUtil.ConnectionFactory() # Default connection source self.starrUtil = STARRUtil.StarrCommonUtils(self.bqClient) self.categoryBySourceDescr = dict() self.clinicalItemByCategoryIdExtId = dict()
def setUp(self): log.setLevel(logging.INFO) # without this no logs are printed """Prepare state for test cases""" DBTestCase.setUp(self) log.info("Sourcing from BigQuery DB") ClinicalItemDataLoader.build_clinical_item_psql_schemata() self.converter = STARRTreatmentTeamConversion.STARRTreatmentTeamConversion( ) # Instance to test on self.bqConn = self.converter.bqConn self.starrUtil = STARRUtil.StarrCommonUtils(self.converter.bqClient) # point the converter to dummy source table STARRTreatmentTeamConversion.SOURCE_TABLE = TEST_SOURCE_TABLE
def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) log.info("Sourcing from BigQuery DB") ClinicalItemDataLoader.build_clinical_item_psql_schemata() self.converter = STARRTreatmentTeamConversion.STARRTreatmentTeamConversion() # Instance to test on self.bqConn = self.converter.bqConn self.starrUtil = STARRUtil.StarrCommonUtils(self.converter.bqClient) # point the converter to dummy source table STARRTreatmentTeamConversion.SOURCE_TABLE = TEST_SOURCE_TABLE log.warn("Removing test table, if exists: {}".format(TEST_SOURCE_TABLE)) bq_cursor = self.bqConn.cursor() bq_cursor.execute('DROP TABLE IF EXISTS {};'.format(TEST_SOURCE_TABLE))
def convertAndUpload(self, convOptions, tempDir='/tmp/', removeCsvs=True, datasetId='starr_datalake2018'): """ Wrapper around primary run function, does conversion locally and uploads to BQ No batching done for treatment team since converted table is small """ conn = self.connFactory.connection() starrUtil = STARRUtil.StarrCommonUtils(self.bqClient) self.convertSourceItems(convOptions, conn) batchCounter = 99999 starrUtil.dumpPatientItemToCsv(tempDir, batchCounter) self.bqClient.reconnect_client() # refresh bq client connection starrUtil.uploadPatientItemCsvToBQ(tempDir, datasetId, batchCounter) if removeCsvs: starrUtil.removePatientItemCsv(tempDir, batchCounter) starrUtil.removePatientItemAddedLines(SOURCE_TABLE)
def __init__(self): """Default constructor""" self.bqConn = bigQueryUtil.connection() self.bqClient = bigQueryUtil.BigQueryClient() self.connFactory = DBUtil.ConnectionFactory( ) # Default connection source, but Allow specification of alternative DB connection source self.starrUtil = STARRUtil.StarrCommonUtils(self.bqClient) self.categoryBySourceDescr = dict( ) # Local cache to track the clinical item category table contents self.clinicalItemByCategoryIdExtId = dict( ) # Local cache to track clinical item table contents self.itemCollectionByKeyStr = dict( ) # Local cache to track item collections self.itemCollectionItemByCollectionIdItemId = dict( ) # Local cache to track item collection items self.patient_items = dict() # Local cache of processed patient items self.patient_item_collection_links = set( ) # Local cache of processed patient item collection links
def setUp(self): log.setLevel(logging.INFO) # without this no logs are printed """Prepare state for test cases""" DBTestCase.setUp(self) log.info("Sourcing from BigQuery DB") ClinicalItemDataLoader.build_clinical_item_psql_schemata() self.converter = STARROrderProcConversion.STARROrderProcConversion( ) # Instance to test on self.bqConn = self.converter.bqConn self.starrUtil = STARRUtil.StarrCommonUtils(self.converter.bqClient) # point the converter to dummy source table STARROrderProcConversion.SOURCE_TABLE = TEST_SOURCE_TABLE STARROrderProcConversion.ORDERSET_TABLE = TEST_ORDERSET_TABLE STARROrderProcConversion.TARGET_DATASET_ID = TEST_DEST_DATASET log.warn("Removing test tables, if they exist: {} and {}".format( TEST_SOURCE_TABLE, TEST_ORDERSET_TABLE)) bq_cursor = self.bqConn.cursor() bq_cursor.execute('DROP TABLE IF EXISTS {};'.format(TEST_SOURCE_TABLE)) bq_cursor.execute( 'DROP TABLE IF EXISTS {};'.format(TEST_ORDERSET_TABLE))