def test_qualify_tables(self): r = sql_wrangle.qualify_tables('temp.some_table', hpo_id='fake') self.assertEqual(r, 'fake_temp_some_table') r = sql_wrangle.qualify_tables('synpuf_100.achilles_results', hpo_id='fake') self.assertEqual(r, 'fake_achilles_results') r = sql_wrangle.qualify_tables('temp.some_table', hpo_id='pitt_temple') self.assertEqual(r, 'pitt_temple_temp_some_table') r = sql_wrangle.qualify_tables('synpuf_100.achilles_results', hpo_id='pitt_temple') self.assertEqual(r, 'pitt_temple_achilles_results')
def _get_heel_commands(hpo_id): raw_commands = _extract_sql_queries(ACHILLES_HEEL_DML) commands = [ sql_wrangle.qualify_tables(cmd, hpo_id) for cmd in raw_commands ] for command in commands: yield command
def test_measurement_concept_sets_table(self): query = sql_wrangle.qualify_tables( '''SELECT * FROM {dataset_id}.{table_id}'''.format( dataset_id=self.dataset_id, table_id=MEASUREMENT_CONCEPT_SETS_TABLE)) response = bq_utils.query(query) actual_fields = [{ 'name': field['name'].lower(), 'type': field['type'].lower() } for field in response['schema']['fields']] expected_fields = [{ 'name': field['name'].lower(), 'type': field['type'].lower() } for field in resources.fields_for(MEASUREMENT_CONCEPT_SETS_TABLE)] self.assertListEqual(expected_fields, actual_fields) measurement_concept_sets_table_path = os.path.join( resources.resource_path, MEASUREMENT_CONCEPT_SETS_TABLE + '.csv') expected_total_rows = len( resources.csv_to_list(measurement_concept_sets_table_path)) self.assertEqual(expected_total_rows, int(response['totalRows']))
def test_load_analyses(self): achilles.create_tables(FAKE_HPO_ID, True) achilles.load_analyses(FAKE_HPO_ID) cmd = sql_wrangle.qualify_tables( 'SELECT DISTINCT(analysis_id) FROM %sachilles_analysis' % sql_wrangle.PREFIX_PLACEHOLDER, FAKE_HPO_ID) result = bq_utils.query(cmd) self.assertEqual(ACHILLES_LOOKUP_COUNT, int(result['totalRows']))
def test_run_analyses(self): # Long-running test self._load_dataset() achilles.create_tables(FAKE_HPO_ID, True) achilles.load_analyses(FAKE_HPO_ID) achilles.run_analyses(hpo_id=FAKE_HPO_ID) cmd = sql_wrangle.qualify_tables( 'SELECT COUNT(1) FROM %sachilles_results' % sql_wrangle.PREFIX_PLACEHOLDER, FAKE_HPO_ID) result = bq_utils.query(cmd) self.assertEqual(int(result['rows'][0]['f'][0]['v']), ACHILLES_RESULTS_COUNT)
def test_get_lab_concept_summary_query(self): summary_query = required_labs.get_lab_concept_summary_query( FAKE_HPO_ID) summary_response = bq_utils.query(summary_query) summary_rows = bq_utils.response2rows(summary_response) submitted_labs = [ row for row in summary_rows if row['measurement_concept_id_exists'] == 1 ] actual_total_labs = summary_response['totalRows'] # Count the total number of labs required, this number should be equal to the total number of rows in the # results generated by get_lab_concept_summary_query including the submitted and missing labs. unique_ancestor_concept_query = sql_wrangle.qualify_tables( """SELECT DISTINCT ancestor_concept_id FROM `{project_id}.{dataset_id}.{table_id}`""" .format(project_id=self.project_id, dataset_id=self.dataset_id, table_id=MEASUREMENT_CONCEPT_SETS_DESCENDANTS_TABLE)) unique_ancestor_cocnept_response = bq_utils.query( unique_ancestor_concept_query) expected_total_labs = unique_ancestor_cocnept_response['totalRows'] # Count the number of labs in the measurement table, this number should be equal to the number of labs # submitted by the fake site unique_measurement_concept_id_query = ''' SELECT DISTINCT c.ancestor_concept_id FROM `{project_id}.{dataset_id}.{measurement_concept_sets_descendants}` AS c JOIN `{project_id}.{dataset_id}.{measurement}` AS m ON c.descendant_concept_id = m.measurement_concept_id '''.format(project_id=self.project_id, dataset_id=self.dataset_id, measurement_concept_sets_descendants= MEASUREMENT_CONCEPT_SETS_DESCENDANTS_TABLE, measurement=bq_utils.get_table_id( FAKE_HPO_ID, common.MEASUREMENT)) unique_measurement_concept_id_response = bq_utils.query( unique_measurement_concept_id_query) unique_measurement_concept_id_total_labs = unique_measurement_concept_id_response[ 'totalRows'] self.assertEqual(int(expected_total_labs), int(actual_total_labs), msg='Compare the total number of labs') self.assertEqual(int(unique_measurement_concept_id_total_labs), len(submitted_labs), msg='Compare the number ' 'of labs submitted ' 'in the measurement')
def test_load_measurement_concept_sets_descendants_table(self): query = sql_wrangle.qualify_tables( """SELECT * FROM {dataset_id}.{table_id}""".format( dataset_id=self.dataset_id, table_id=MEASUREMENT_CONCEPT_SETS_DESCENDANTS_TABLE)) response = bq_utils.query(query) actual_fields = [{ 'name': field['name'].lower(), 'type': field['type'].lower() } for field in response['schema']['fields']] expected_fields = [{ 'name': field['name'].lower(), 'type': field['type'].lower() } for field in resources.fields_for( MEASUREMENT_CONCEPT_SETS_DESCENDANTS_TABLE)] self.assertListEqual(expected_fields, actual_fields)
def test_heel_analyses(self, mock_hpo_bucket): # Long-running test mock_hpo_bucket.return_value = self.get_mock_hpo_bucket() # create randomized tables to bypass BQ rate limits random_string = str(randint(10000, 99999)) randomized_hpo_id = FAKE_HPO_ID + '_' + random_string # prepare self._load_dataset(randomized_hpo_id) test_util.populate_achilles(hpo_id=randomized_hpo_id, include_heel=False) # define tables achilles_heel_results = randomized_hpo_id + '_' + achilles_heel.ACHILLES_HEEL_RESULTS achilles_results_derived = randomized_hpo_id + '_' + achilles_heel.ACHILLES_RESULTS_DERIVED # run achilles heel achilles_heel.create_tables(randomized_hpo_id, True) achilles_heel.run_heel(hpo_id=randomized_hpo_id) # validate query = sql_wrangle.qualify_tables( 'SELECT COUNT(1) as num_rows FROM %s' % achilles_heel_results) response = bq_utils.query(query) rows = bq_utils.response2rows(response) self.assertEqual(ACHILLES_HEEL_RESULTS_COUNT, rows[0]['num_rows']) query = sql_wrangle.qualify_tables( 'SELECT COUNT(1) as num_rows FROM %s' % achilles_results_derived) response = bq_utils.query(query) rows = bq_utils.response2rows(response) self.assertEqual(ACHILLES_RESULTS_DERIVED_COUNT, rows[0]['num_rows']) # test new heel re-categorization errors = [ 2, 4, 5, 101, 200, 206, 207, 209, 400, 405, 406, 409, 411, 413, 500, 505, 506, 509, 600, 605, 606, 609, 613, 700, 705, 706, 709, 711, 713, 715, 716, 717, 800, 805, 806, 809, 813, 814, 906, 1006, 1609, 1805 ] query = sql_wrangle.qualify_tables( """SELECT analysis_id FROM {table_id} WHERE achilles_heel_warning LIKE 'ERROR:%' GROUP BY analysis_id""".format(table_id=achilles_heel_results)) response = bq_utils.query(query) rows = bq_utils.response2rows(response) actual_result = [row["analysis_id"] for row in rows] for analysis_id in actual_result: self.assertIn(analysis_id, errors) warnings = [ 4, 5, 7, 8, 9, 200, 210, 302, 400, 402, 412, 420, 500, 511, 512, 513, 514, 515, 602, 612, 620, 702, 712, 720, 802, 812, 820 ] query = sql_wrangle.qualify_tables( """SELECT analysis_id FROM {table_id} WHERE achilles_heel_warning LIKE 'WARNING:%' GROUP BY analysis_id""".format(table_id=achilles_heel_results)) response = bq_utils.query(query) rows = bq_utils.response2rows(response) actual_result = [row["analysis_id"] for row in rows] for analysis_id in actual_result: self.assertIn(analysis_id, warnings) notifications = [ 101, 103, 105, 114, 115, 118, 208, 301, 410, 610, 710, 810, 900, 907, 1000, 1800, 1807 ] query = sql_wrangle.qualify_tables( """SELECT analysis_id FROM {table_id} WHERE achilles_heel_warning LIKE 'NOTIFICATION:%' and analysis_id is not null GROUP BY analysis_id""".format(table_id=achilles_heel_results)) response = bq_utils.query(query) rows = bq_utils.response2rows(response) actual_result = [row["analysis_id"] for row in rows] for analysis_id in actual_result: self.assertIn(analysis_id, notifications)
def _get_run_analysis_commands(hpo_id): raw_commands = sql_wrangle.get_commands(ACHILLES_DML_SQL_PATH) commands = [ sql_wrangle.qualify_tables(cmd, hpo_id) for cmd in raw_commands ] return commands