def uploadPatientItemCsvToBQ(self, tempDir, datasetId, batchCounter=999): log.info('Uploading patient_item CSV to BQ dataset %s for batch %s' % (datasetId, batchCounter)) patient_item_schema = [ bigquery.SchemaField('patient_item_id', 'INT64', 'REQUIRED', None, ()), bigquery.SchemaField('external_id', 'INT64', 'NULLABLE', None, ()), bigquery.SchemaField('patient_id', 'INT64', 'REQUIRED', None, ()), bigquery.SchemaField('clinical_item_id', 'INT64', 'REQUIRED', None, ()), bigquery.SchemaField('item_date', 'TIMESTAMP', 'REQUIRED', None, ()), bigquery.SchemaField('analyze_date', 'TIMESTAMP', 'NULLABLE', None, ()), bigquery.SchemaField('encounter_id', 'INT64', 'NULLABLE', None, ()), bigquery.SchemaField('text_value', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('num_value', 'FLOAT64', 'NULLABLE', None, ()), bigquery.SchemaField('source_id', 'INT64', 'NULLABLE', None, ()), bigquery.SchemaField('item_date_utc', 'TIMESTAMP', 'NULLABLE', None, ()) ] csv_path = tempDir + os.path.sep + str( batchCounter) + '_patient_item.csv' bigQueryUtil.headerChecker(csv_path, [sf.name for sf in patient_item_schema]) self.bqClient.load_csv_to_table(datasetId, 'patient_item', csv_path, schema=patient_item_schema, skip_rows=1, append_to_table=True)
def uploadClinicalTablesCsvToBQ(self, tempDir, datasetId): log.info('Uploading clinical_item_category CSV to BQ dataset %s' % datasetId) clinical_item_category_schema = [ bigquery.SchemaField('clinical_item_category_id', 'INT64', 'REQUIRED', None, ()), bigquery.SchemaField('source_table', 'STRING', 'REQUIRED', None, ()), bigquery.SchemaField('description', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('default_recommend', 'INT64', 'NULLABLE', None, ())] clinical_item_category_csv_path = tempDir + '/clinical_item_category.csv' bigQueryUtil.headerChecker(clinical_item_category_csv_path, [sf.name for sf in clinical_item_category_schema]) self.bqClient.load_csv_to_table(datasetId, 'clinical_item_category', clinical_item_category_csv_path, skip_rows=1, append_to_table=True) # auto_detect_schema=False, schema=clinical_item_category_schema) log.info('Uploading clinical_item CSV to BQ dataset %s' % datasetId) clinical_item_schema = [bigquery.SchemaField('clinical_item_id', 'INT64', 'REQUIRED', None, ()), bigquery.SchemaField('clinical_item_category_id', 'INT64', 'REQUIRED', None, ()), bigquery.SchemaField('external_id', 'INT64', 'NULLABLE', None, ()), bigquery.SchemaField('name', 'STRING', 'REQUIRED', None, ()), bigquery.SchemaField('description', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('default_recommend', 'INT64', 'NULLABLE', None, ()), bigquery.SchemaField('item_count', 'FLOAT64', 'NULLABLE', None, ()), bigquery.SchemaField('patient_count', 'FLOAT64', 'NULLABLE', None, ()), bigquery.SchemaField('encounter_count', 'FLOAT64', 'NULLABLE', None, ()), bigquery.SchemaField('analysis_status', 'INT64', 'NULLABLE', None, ()), bigquery.SchemaField('outcome_interest', 'INT64', 'NULLABLE', None, ())] clinical_item_csv_path = tempDir + '/clinical_item.csv' bigQueryUtil.headerChecker(clinical_item_csv_path, [sf.name for sf in clinical_item_schema]) self.bqClient.load_csv_to_table(datasetId, 'clinical_item', clinical_item_csv_path, skip_rows=1, append_to_table=True)
def uploadClinicalTablesCsvToBQ(self, tempDir, datasetId): log.info('Uploading clinical_item_category CSV to BQ dataset %s' % datasetId) clinical_item_category_schema = self.bqClient.client.get_table( self.bqClient.client.dataset('clinical_item2018', 'mining-clinical-decisions') .table('clinical_item_category') ).schema clinical_item_category_csv_path = tempDir + '/clinical_item_category.csv' bigQueryUtil.headerChecker(clinical_item_category_csv_path, [sf.name for sf in clinical_item_category_schema]) self.bqClient.load_csv_to_table(datasetId, 'clinical_item_category', clinical_item_category_csv_path, skip_rows=1, append_to_table=True) # auto_detect_schema=False, schema=clinical_item_category_schema) log.info('Uploading clinical_item CSV to BQ dataset %s' % datasetId) clinical_item_schema = self.bqClient.client.get_table( self.bqClient.client.dataset('clinical_item2018', 'mining-clinical-decisions').table('clinical_item') ).schema clinical_item_csv_path = tempDir + '/clinical_item.csv' bigQueryUtil.headerChecker(clinical_item_csv_path, [sf.name for sf in clinical_item_schema]) self.bqClient.load_csv_to_table(datasetId, 'clinical_item', clinical_item_csv_path, skip_rows=1, append_to_table=True)
def uploadPatientItemCsvToBQ(self, tempDir, batchCounter, datasetId): log.info('Uploading patient_item CSV to BQ dataset %s for batch %s' % (datasetId, batchCounter)) patient_item_schema = self.bqClient.client.get_table( self.bqClient.client.dataset('clinical_item2018', 'mining-clinical-decisions').table('patient_item') ).schema csv_path = tempDir + '/' + str(batchCounter) + '_patient_item.csv' bigQueryUtil.headerChecker(csv_path, [sf.name for sf in patient_item_schema]) self.bqClient.load_csv_to_table(datasetId, 'patient_item', csv_path, skip_rows=1, append_to_table=True)
def uploadItemCollectionTablesCsvToBQ(self, tempDir, datasetId): log.info( 'Uploading item_collection CSV to BQ dataset {}'.format(datasetId)) item_collection_schema = self.get_schema_filtered( 'clinical_item2018', 'item_collection') item_collection_csv_path = tempDir + '/item_collection.csv' bigQueryUtil.headerChecker(item_collection_csv_path, [sf.name for sf in item_collection_schema]) self.bqClient.load_csv_to_table(datasetId, 'item_collection', item_collection_csv_path, skip_rows=1, append_to_table=True, auto_detect_schema=False, schema=item_collection_schema) log.info('Uploading item_collection_item CSV to BQ dataset {}'.format( datasetId)) item_collection_item_schema = self.get_schema_filtered( 'clinical_item2018', 'item_collection_item') item_collection_item_csv_path = tempDir + '/item_collection_item.csv' bigQueryUtil.headerChecker( item_collection_item_csv_path, [sf.name for sf in item_collection_item_schema]) self.bqClient.load_csv_to_table(datasetId, 'item_collection_item', item_collection_item_csv_path, skip_rows=1, append_to_table=True, auto_detect_schema=False, schema=item_collection_item_schema)
def uploadPatientItemCollectionLinkCsvToBQ(self, tempDir, datasetId, batchCounter=999): log.info( 'Uploading patient_item CSV to BQ dataset {} for batch {}'.format( datasetId, batchCounter)) patient_item_collection_link_schema = self.get_schema_filtered( 'clinical_item2018', 'patient_item_collection_link') csv_path = tempDir + os.path.sep + str( batchCounter) + '_patient_item_collection_link.csv' bigQueryUtil.headerChecker( csv_path, [sf.name for sf in patient_item_collection_link_schema]) self.bqClient.load_csv_to_table( datasetId, 'patient_item_collection_link', csv_path, skip_rows=1, append_to_table=True, auto_detect_schema=False, schema=patient_item_collection_link_schema)