Ejemplo n.º 1
0
    def uploadPatientItemCsvToBQ(self, tempDir, datasetId, batchCounter=999):
        log.info('Uploading patient_item CSV to BQ dataset %s for batch %s' %
                 (datasetId, batchCounter))
        patient_item_schema = [
            bigquery.SchemaField('patient_item_id', 'INT64', 'REQUIRED', None,
                                 ()),
            bigquery.SchemaField('external_id', 'INT64', 'NULLABLE', None, ()),
            bigquery.SchemaField('patient_id', 'INT64', 'REQUIRED', None, ()),
            bigquery.SchemaField('clinical_item_id', 'INT64', 'REQUIRED', None,
                                 ()),
            bigquery.SchemaField('item_date', 'TIMESTAMP', 'REQUIRED', None,
                                 ()),
            bigquery.SchemaField('analyze_date', 'TIMESTAMP', 'NULLABLE', None,
                                 ()),
            bigquery.SchemaField('encounter_id', 'INT64', 'NULLABLE', None,
                                 ()),
            bigquery.SchemaField('text_value', 'STRING', 'NULLABLE', None, ()),
            bigquery.SchemaField('num_value', 'FLOAT64', 'NULLABLE', None, ()),
            bigquery.SchemaField('source_id', 'INT64', 'NULLABLE', None, ()),
            bigquery.SchemaField('item_date_utc', 'TIMESTAMP', 'NULLABLE',
                                 None, ())
        ]

        csv_path = tempDir + os.path.sep + str(
            batchCounter) + '_patient_item.csv'

        bigQueryUtil.headerChecker(csv_path,
                                   [sf.name for sf in patient_item_schema])

        self.bqClient.load_csv_to_table(datasetId,
                                        'patient_item',
                                        csv_path,
                                        schema=patient_item_schema,
                                        skip_rows=1,
                                        append_to_table=True)
Ejemplo n.º 2
0
    def uploadClinicalTablesCsvToBQ(self, tempDir, datasetId):
        log.info('Uploading clinical_item_category CSV to BQ dataset %s' % datasetId)
        clinical_item_category_schema = [
            bigquery.SchemaField('clinical_item_category_id', 'INT64', 'REQUIRED', None, ()),
            bigquery.SchemaField('source_table', 'STRING', 'REQUIRED', None, ()),
            bigquery.SchemaField('description', 'STRING', 'NULLABLE', None, ()),
            bigquery.SchemaField('default_recommend', 'INT64', 'NULLABLE', None, ())]

        clinical_item_category_csv_path = tempDir + '/clinical_item_category.csv'

        bigQueryUtil.headerChecker(clinical_item_category_csv_path, [sf.name for sf in clinical_item_category_schema])

        self.bqClient.load_csv_to_table(datasetId, 'clinical_item_category', clinical_item_category_csv_path,
                                        skip_rows=1, append_to_table=True)
        # auto_detect_schema=False, schema=clinical_item_category_schema)

        log.info('Uploading clinical_item CSV to BQ dataset %s' % datasetId)
        clinical_item_schema = [bigquery.SchemaField('clinical_item_id', 'INT64', 'REQUIRED', None, ()),
                                bigquery.SchemaField('clinical_item_category_id', 'INT64', 'REQUIRED', None, ()),
                                bigquery.SchemaField('external_id', 'INT64', 'NULLABLE', None, ()),
                                bigquery.SchemaField('name', 'STRING', 'REQUIRED', None, ()),
                                bigquery.SchemaField('description', 'STRING', 'NULLABLE', None, ()),
                                bigquery.SchemaField('default_recommend', 'INT64', 'NULLABLE', None, ()),
                                bigquery.SchemaField('item_count', 'FLOAT64', 'NULLABLE', None, ()),
                                bigquery.SchemaField('patient_count', 'FLOAT64', 'NULLABLE', None, ()),
                                bigquery.SchemaField('encounter_count', 'FLOAT64', 'NULLABLE', None, ()),
                                bigquery.SchemaField('analysis_status', 'INT64', 'NULLABLE', None, ()),
                                bigquery.SchemaField('outcome_interest', 'INT64', 'NULLABLE', None, ())]

        clinical_item_csv_path = tempDir + '/clinical_item.csv'

        bigQueryUtil.headerChecker(clinical_item_csv_path, [sf.name for sf in clinical_item_schema])

        self.bqClient.load_csv_to_table(datasetId, 'clinical_item', clinical_item_csv_path,
                                        skip_rows=1, append_to_table=True)
    def uploadClinicalTablesCsvToBQ(self, tempDir, datasetId):
        log.info('Uploading clinical_item_category CSV to BQ dataset %s' % datasetId)
        clinical_item_category_schema = self.bqClient.client.get_table(
                self.bqClient.client.dataset('clinical_item2018', 'mining-clinical-decisions')
                    .table('clinical_item_category')
        ).schema

        clinical_item_category_csv_path = tempDir + '/clinical_item_category.csv'

        bigQueryUtil.headerChecker(clinical_item_category_csv_path, [sf.name for sf in clinical_item_category_schema])

        self.bqClient.load_csv_to_table(datasetId, 'clinical_item_category', clinical_item_category_csv_path,
                                        skip_rows=1, append_to_table=True)
        # auto_detect_schema=False, schema=clinical_item_category_schema)

        log.info('Uploading clinical_item CSV to BQ dataset %s' % datasetId)
        clinical_item_schema = self.bqClient.client.get_table(
                self.bqClient.client.dataset('clinical_item2018', 'mining-clinical-decisions').table('clinical_item')
        ).schema

        clinical_item_csv_path = tempDir + '/clinical_item.csv'

        bigQueryUtil.headerChecker(clinical_item_csv_path, [sf.name for sf in clinical_item_schema])

        self.bqClient.load_csv_to_table(datasetId, 'clinical_item', clinical_item_csv_path,
                                        skip_rows=1, append_to_table=True)
    def uploadPatientItemCsvToBQ(self, tempDir, batchCounter, datasetId):
        log.info('Uploading patient_item CSV to BQ dataset %s for batch %s' % (datasetId, batchCounter))

        patient_item_schema = self.bqClient.client.get_table(
                self.bqClient.client.dataset('clinical_item2018', 'mining-clinical-decisions').table('patient_item')
        ).schema

        csv_path = tempDir + '/' + str(batchCounter) + '_patient_item.csv'

        bigQueryUtil.headerChecker(csv_path, [sf.name for sf in patient_item_schema])

        self.bqClient.load_csv_to_table(datasetId, 'patient_item', csv_path, skip_rows=1, append_to_table=True)
Ejemplo n.º 5
0
    def uploadItemCollectionTablesCsvToBQ(self, tempDir, datasetId):
        log.info(
            'Uploading item_collection CSV to BQ dataset {}'.format(datasetId))
        item_collection_schema = self.get_schema_filtered(
            'clinical_item2018', 'item_collection')

        item_collection_csv_path = tempDir + '/item_collection.csv'

        bigQueryUtil.headerChecker(item_collection_csv_path,
                                   [sf.name for sf in item_collection_schema])

        self.bqClient.load_csv_to_table(datasetId,
                                        'item_collection',
                                        item_collection_csv_path,
                                        skip_rows=1,
                                        append_to_table=True,
                                        auto_detect_schema=False,
                                        schema=item_collection_schema)

        log.info('Uploading item_collection_item CSV to BQ dataset {}'.format(
            datasetId))
        item_collection_item_schema = self.get_schema_filtered(
            'clinical_item2018', 'item_collection_item')

        item_collection_item_csv_path = tempDir + '/item_collection_item.csv'

        bigQueryUtil.headerChecker(
            item_collection_item_csv_path,
            [sf.name for sf in item_collection_item_schema])

        self.bqClient.load_csv_to_table(datasetId,
                                        'item_collection_item',
                                        item_collection_item_csv_path,
                                        skip_rows=1,
                                        append_to_table=True,
                                        auto_detect_schema=False,
                                        schema=item_collection_item_schema)
Ejemplo n.º 6
0
    def uploadPatientItemCollectionLinkCsvToBQ(self,
                                               tempDir,
                                               datasetId,
                                               batchCounter=999):
        log.info(
            'Uploading patient_item CSV to BQ dataset {} for batch {}'.format(
                datasetId, batchCounter))
        patient_item_collection_link_schema = self.get_schema_filtered(
            'clinical_item2018', 'patient_item_collection_link')

        csv_path = tempDir + os.path.sep + str(
            batchCounter) + '_patient_item_collection_link.csv'

        bigQueryUtil.headerChecker(
            csv_path, [sf.name for sf in patient_item_collection_link_schema])

        self.bqClient.load_csv_to_table(
            datasetId,
            'patient_item_collection_link',
            csv_path,
            skip_rows=1,
            append_to_table=True,
            auto_detect_schema=False,
            schema=patient_item_collection_link_schema)