Python qualify_tables Examples, validation.sql_wrangle.qualify_tables Python Examples

Example #1

0

Show file

    def test_qualify_tables(self):
        r = sql_wrangle.qualify_tables('temp.some_table', hpo_id='fake')
        self.assertEqual(r, 'fake_temp_some_table')

        r = sql_wrangle.qualify_tables('synpuf_100.achilles_results',
                                       hpo_id='fake')
        self.assertEqual(r, 'fake_achilles_results')

        r = sql_wrangle.qualify_tables('temp.some_table', hpo_id='pitt_temple')
        self.assertEqual(r, 'pitt_temple_temp_some_table')

        r = sql_wrangle.qualify_tables('synpuf_100.achilles_results',
                                       hpo_id='pitt_temple')
        self.assertEqual(r, 'pitt_temple_achilles_results')

Example #2

0

Show file

File: achilles_heel.py Project: dcampbell-vumc/curation

def _get_heel_commands(hpo_id):
    raw_commands = _extract_sql_queries(ACHILLES_HEEL_DML)
    commands = [
        sql_wrangle.qualify_tables(cmd, hpo_id) for cmd in raw_commands
    ]
    for command in commands:
        yield command

Example #3

0

Show file

    def test_measurement_concept_sets_table(self):

        query = sql_wrangle.qualify_tables(
            '''SELECT * FROM {dataset_id}.{table_id}'''.format(
                dataset_id=self.dataset_id,
                table_id=MEASUREMENT_CONCEPT_SETS_TABLE))
        response = bq_utils.query(query)

        actual_fields = [{
            'name': field['name'].lower(),
            'type': field['type'].lower()
        } for field in response['schema']['fields']]

        expected_fields = [{
            'name': field['name'].lower(),
            'type': field['type'].lower()
        } for field in resources.fields_for(MEASUREMENT_CONCEPT_SETS_TABLE)]

        self.assertListEqual(expected_fields, actual_fields)

        measurement_concept_sets_table_path = os.path.join(
            resources.resource_path, MEASUREMENT_CONCEPT_SETS_TABLE + '.csv')
        expected_total_rows = len(
            resources.csv_to_list(measurement_concept_sets_table_path))
        self.assertEqual(expected_total_rows, int(response['totalRows']))

Example #4

0

Show file

File: achilles_test.py Project: rfrancis1/curation

 def test_load_analyses(self):
     achilles.create_tables(FAKE_HPO_ID, True)
     achilles.load_analyses(FAKE_HPO_ID)
     cmd = sql_wrangle.qualify_tables(
         'SELECT DISTINCT(analysis_id) FROM %sachilles_analysis' %
         sql_wrangle.PREFIX_PLACEHOLDER, FAKE_HPO_ID)
     result = bq_utils.query(cmd)
     self.assertEqual(ACHILLES_LOOKUP_COUNT, int(result['totalRows']))

Example #5

0

Show file

File: achilles_test.py Project: rfrancis1/curation

 def test_run_analyses(self):
     # Long-running test
     self._load_dataset()
     achilles.create_tables(FAKE_HPO_ID, True)
     achilles.load_analyses(FAKE_HPO_ID)
     achilles.run_analyses(hpo_id=FAKE_HPO_ID)
     cmd = sql_wrangle.qualify_tables(
         'SELECT COUNT(1) FROM %sachilles_results' %
         sql_wrangle.PREFIX_PLACEHOLDER, FAKE_HPO_ID)
     result = bq_utils.query(cmd)
     self.assertEqual(int(result['rows'][0]['f'][0]['v']),
                      ACHILLES_RESULTS_COUNT)

Example #6

0

Show file

    def test_get_lab_concept_summary_query(self):
        summary_query = required_labs.get_lab_concept_summary_query(
            FAKE_HPO_ID)
        summary_response = bq_utils.query(summary_query)
        summary_rows = bq_utils.response2rows(summary_response)
        submitted_labs = [
            row for row in summary_rows
            if row['measurement_concept_id_exists'] == 1
        ]
        actual_total_labs = summary_response['totalRows']

        # Count the total number of labs required, this number should be equal to the total number of rows in the
        # results generated by get_lab_concept_summary_query including the submitted and missing labs.
        unique_ancestor_concept_query = sql_wrangle.qualify_tables(
            """SELECT DISTINCT ancestor_concept_id FROM `{project_id}.{dataset_id}.{table_id}`"""
            .format(project_id=self.project_id,
                    dataset_id=self.dataset_id,
                    table_id=MEASUREMENT_CONCEPT_SETS_DESCENDANTS_TABLE))
        unique_ancestor_cocnept_response = bq_utils.query(
            unique_ancestor_concept_query)
        expected_total_labs = unique_ancestor_cocnept_response['totalRows']

        # Count the number of labs in the measurement table, this number should be equal to the number of labs
        # submitted by the fake site
        unique_measurement_concept_id_query = '''
                SELECT
                  DISTINCT c.ancestor_concept_id
                FROM
                  `{project_id}.{dataset_id}.{measurement_concept_sets_descendants}` AS c
                JOIN
                  `{project_id}.{dataset_id}.{measurement}` AS m
                ON
                  c.descendant_concept_id = m.measurement_concept_id
                '''.format(project_id=self.project_id,
                           dataset_id=self.dataset_id,
                           measurement_concept_sets_descendants=
                           MEASUREMENT_CONCEPT_SETS_DESCENDANTS_TABLE,
                           measurement=bq_utils.get_table_id(
                               FAKE_HPO_ID, common.MEASUREMENT))

        unique_measurement_concept_id_response = bq_utils.query(
            unique_measurement_concept_id_query)
        unique_measurement_concept_id_total_labs = unique_measurement_concept_id_response[
            'totalRows']

        self.assertEqual(int(expected_total_labs),
                         int(actual_total_labs),
                         msg='Compare the total number of labs')
        self.assertEqual(int(unique_measurement_concept_id_total_labs),
                         len(submitted_labs),
                         msg='Compare the number '
                         'of labs submitted '
                         'in the measurement')

Example #7

0

Show file

    def test_load_measurement_concept_sets_descendants_table(self):

        query = sql_wrangle.qualify_tables(
            """SELECT * FROM {dataset_id}.{table_id}""".format(
                dataset_id=self.dataset_id,
                table_id=MEASUREMENT_CONCEPT_SETS_DESCENDANTS_TABLE))
        response = bq_utils.query(query)

        actual_fields = [{
            'name': field['name'].lower(),
            'type': field['type'].lower()
        } for field in response['schema']['fields']]

        expected_fields = [{
            'name': field['name'].lower(),
            'type': field['type'].lower()
        } for field in resources.fields_for(
            MEASUREMENT_CONCEPT_SETS_DESCENDANTS_TABLE)]

        self.assertListEqual(expected_fields, actual_fields)

Example #8

0

Show file

    def test_heel_analyses(self, mock_hpo_bucket):
        # Long-running test
        mock_hpo_bucket.return_value = self.get_mock_hpo_bucket()

        # create randomized tables to bypass BQ rate limits
        random_string = str(randint(10000, 99999))
        randomized_hpo_id = FAKE_HPO_ID + '_' + random_string

        # prepare
        self._load_dataset(randomized_hpo_id)
        test_util.populate_achilles(hpo_id=randomized_hpo_id,
                                    include_heel=False)

        # define tables
        achilles_heel_results = randomized_hpo_id + '_' + achilles_heel.ACHILLES_HEEL_RESULTS
        achilles_results_derived = randomized_hpo_id + '_' + achilles_heel.ACHILLES_RESULTS_DERIVED

        # run achilles heel
        achilles_heel.create_tables(randomized_hpo_id, True)
        achilles_heel.run_heel(hpo_id=randomized_hpo_id)

        # validate
        query = sql_wrangle.qualify_tables(
            'SELECT COUNT(1) as num_rows FROM %s' % achilles_heel_results)
        response = bq_utils.query(query)
        rows = bq_utils.response2rows(response)
        self.assertEqual(ACHILLES_HEEL_RESULTS_COUNT, rows[0]['num_rows'])
        query = sql_wrangle.qualify_tables(
            'SELECT COUNT(1) as num_rows FROM %s' % achilles_results_derived)
        response = bq_utils.query(query)
        rows = bq_utils.response2rows(response)
        self.assertEqual(ACHILLES_RESULTS_DERIVED_COUNT, rows[0]['num_rows'])

        # test new heel re-categorization
        errors = [
            2, 4, 5, 101, 200, 206, 207, 209, 400, 405, 406, 409, 411, 413,
            500, 505, 506, 509, 600, 605, 606, 609, 613, 700, 705, 706, 709,
            711, 713, 715, 716, 717, 800, 805, 806, 809, 813, 814, 906, 1006,
            1609, 1805
        ]
        query = sql_wrangle.qualify_tables(
            """SELECT analysis_id FROM {table_id}
            WHERE achilles_heel_warning LIKE 'ERROR:%'
            GROUP BY analysis_id""".format(table_id=achilles_heel_results))
        response = bq_utils.query(query)
        rows = bq_utils.response2rows(response)
        actual_result = [row["analysis_id"] for row in rows]
        for analysis_id in actual_result:
            self.assertIn(analysis_id, errors)

        warnings = [
            4, 5, 7, 8, 9, 200, 210, 302, 400, 402, 412, 420, 500, 511, 512,
            513, 514, 515, 602, 612, 620, 702, 712, 720, 802, 812, 820
        ]
        query = sql_wrangle.qualify_tables(
            """SELECT analysis_id FROM {table_id}
            WHERE achilles_heel_warning LIKE 'WARNING:%'
            GROUP BY analysis_id""".format(table_id=achilles_heel_results))
        response = bq_utils.query(query)
        rows = bq_utils.response2rows(response)
        actual_result = [row["analysis_id"] for row in rows]
        for analysis_id in actual_result:
            self.assertIn(analysis_id, warnings)

        notifications = [
            101, 103, 105, 114, 115, 118, 208, 301, 410, 610, 710, 810, 900,
            907, 1000, 1800, 1807
        ]
        query = sql_wrangle.qualify_tables(
            """SELECT analysis_id FROM {table_id}
            WHERE achilles_heel_warning LIKE 'NOTIFICATION:%' and analysis_id is not null
            GROUP BY analysis_id""".format(table_id=achilles_heel_results))
        response = bq_utils.query(query)
        rows = bq_utils.response2rows(response)
        actual_result = [row["analysis_id"] for row in rows]
        for analysis_id in actual_result:
            self.assertIn(analysis_id, notifications)

Example #9

0

Show file

File: achilles.py Project: dcampbell-vumc/curation

def _get_run_analysis_commands(hpo_id):
    raw_commands = sql_wrangle.get_commands(ACHILLES_DML_SQL_PATH)
    commands = [
        sql_wrangle.qualify_tables(cmd, hpo_id) for cmd in raw_commands
    ]
    return commands