def test_get_map_to_standard_concept_id(self): result = QueryCOHD.get_map_to_standard_concept_id("715.3", "ICD9CM") self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result[0], {"source_concept_code": "715.3", "source_concept_id": 44834979, "source_concept_name": "Osteoarthrosis, localized, not specified whether primary or secondary", "source_vocabulary_id": "ICD9CM", "standard_concept_id": 72990, "standard_concept_name": "Localized osteoarthrosis uncertain if primary OR secondary", "standard_domain_id": "Condition" }) # default vocabulary result = QueryCOHD.get_map_to_standard_concept_id("715.3") self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result[0], {"source_concept_code": "715.3", "source_concept_id": 44834979, "source_concept_name": "Osteoarthrosis, localized, not specified whether primary or secondary", "source_vocabulary_id": "ICD9CM", "standard_concept_id": 72990, "standard_concept_name": "Localized osteoarthrosis uncertain if primary OR secondary", "standard_domain_id": "Condition" }) # invalid concept_code id result = QueryCOHD.get_map_from_standard_concept_id("725.3") self.assertEqual(result, []) # invalid concept_code format result = QueryCOHD.get_map_from_standard_concept_id(725.3) self.assertEqual(result, [])
def test_get_xref_to_OMOP(self): result = QueryCOHD.get_xref_to_OMOP("DOID:8398", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 3) self.assertEqual(result[0], {'intermediate_oxo_id': 'ICD9CM:715.3', 'intermediate_oxo_label': '', 'omop_concept_name': 'Localized osteoarthrosis uncertain if primary OR secondary', 'omop_distance': 1, 'omop_domain_id': 'Condition', 'omop_standard_concept_id': 72990, 'oxo_distance': 1, 'source_oxo_id': 'DOID:8398', 'source_oxo_label': 'osteoarthritis', 'total_distance': 2}) # default distance result = QueryCOHD.get_xref_to_OMOP("DOID:8398") self.assertIsNotNone(result) self.assertEqual(len(result), 3) self.assertEqual(result[0], {'intermediate_oxo_id': 'ICD9CM:715.3', 'intermediate_oxo_label': '', 'omop_concept_name': 'Localized osteoarthrosis uncertain if primary OR secondary', 'omop_distance': 1, 'omop_domain_id': 'Condition', 'omop_standard_concept_id': 72990, 'oxo_distance': 1, 'source_oxo_id': 'DOID:8398', 'source_oxo_label': 'osteoarthritis', 'total_distance': 2}) # default distance result = QueryCOHD.get_xref_to_OMOP("DOID:8398") self.assertIsNotNone(result) self.assertEqual(len(result), 3) # invalid curie id result = QueryCOHD.get_xref_to_OMOP("DOID:83981", 2) self.assertEqual(result, []) # invalid distance format result = QueryCOHD.get_xref_to_OMOP("DOID:8398", "2") self.assertEqual(result, [])
def test_get_map_from_standard_concept_id(self): result = QueryCOHD.get_map_from_standard_concept_id("72990", "ICD9CM") self.assertIsNotNone(result) self.assertEqual(len(result), 10) self.assertEqual(result[0], {"concept_class_id": "4-dig nonbill code", "concept_code": "715.3", "concept_id": 44834979, "concept_name": "Osteoarthrosis, localized, not specified whether primary or secondary", "domain_id": "Condition", "standard_concept": None, "vocabulary_id": "ICD9CM" }) # default vocabulary result = QueryCOHD.get_map_from_standard_concept_id("72990") self.assertIsNotNone(result) self.assertEqual(len(result), 12) self.assertEqual(result[0], {"concept_class_id": "Diagnosis", "concept_code": "116253", "concept_id": 45930832, "concept_name": "Localized Osteoarthrosis Uncertain If Primary or Secondary", "domain_id": "Condition", "standard_concept": None, "vocabulary_id": "CIEL"}) # invalid concept_id id result = QueryCOHD.get_map_from_standard_concept_id("DOID:839812", 2) self.assertEqual(result, []) # invalid concept_id format result = QueryCOHD.get_map_from_standard_concept_id(8398, 2) self.assertEqual(result, [])
def test_get_domain_pair_counts(self): result = QueryCOHD.get_domain_pair_counts(1) self.assertIsNotNone(result) self.assertEqual(len(result), 50) self.assertEqual(result[0], {'count': 1933917, 'dataset_id': 1, 'domain_id_1': 'Condition', 'domain_id_2': 'Condition'}) # default dataset_id result = QueryCOHD.get_domain_pair_counts() self.assertIsNotNone(result) self.assertEqual(len(result), 50) self.assertEqual(result[0], {'count': 1933917, 'dataset_id': 1, 'domain_id_1': 'Condition', 'domain_id_2': 'Condition'}) # invalid dataset_id value result = QueryCOHD.get_domain_pair_counts(-1) self.assertEqual(result, []) # invalid dataset_id type result = QueryCOHD.get_domain_pair_counts('1') self.assertEqual(result, [])
def test_get_source_to_target_has_result(self): # initialise QueryCOHD object queryCOHD = QueryCOHD() # check if correct result is returned result = queryCOHD.get_source_to_target(312327, 313217) self.assertIsNotNone(result)
def test_get_source_to_target_result(self): # initialise QueryCOHD object queryCOHD = QueryCOHD() # load test data with open("./tests/NewQueryCOHDTestsData.json") as file: test_data = json.load(file) # check if correct result is returned result = queryCOHD.get_source_to_target(312327, 313217) self.assertListEqual(result, test_data)
def get_conditions_treating(drug_description, conservative=False): """ Get all the conditions that are associated with a drug. :param drug_description: string (eg. 'Naproxen') :param conservative: bool (True= use exact matching for mapping drug to COHD, False = use all synonyms returned by COHD) :return: dictionary of dictionaries (eg. keys are concept IDs, values look like: {'associated_concept_id': 134736, 'associated_concept_name': 'Backache', 'concept_count': 112, 'concept_frequency': 2.101665438505926e-05, 'concept_id': 1115008} """ # Get the concept ID of the drug drug_concepts = QueryCOHD.find_concept_ids(drug_description) drug_ids = [] if conservative: for concept in drug_concepts: if concept['concept_name'].lower() == drug_description.lower(): drug_ids.append(concept['concept_id']) if not conservative: for concept in drug_concepts: drug_ids.append(concept['concept_id']) # get all the associated conditions associated_concepts = [] for drug_id in drug_ids: associated_concepts += QueryCOHD.get_associated_concept_domain_freq(str(drug_id), "Condition") print(len(associated_concepts)) # go through and sum them all up (no need for conservative flag since that will only be a single one) # get all the unique condition ids associated_concept_ids = set() for concept in associated_concepts: associated_concept_ids.add(concept['associated_concept_id']) # go through the associated conditions, summing up the concept counts result_dict = dict() for associated_concept in associated_concepts: id = associated_concept['associated_concept_id'] if id in result_dict: result_dict[id]['concept_count'] += associated_concept['concept_count'] else: result_dict[id] = associated_concept # We'll need to adjust the frequencies in terms of the total patients treated with this drug total_associated_condition_counts = 0 for id in result_dict: total_associated_condition_counts += result_dict[id]['concept_count'] for id in result_dict: result_dict[id]['concept_frequency'] = result_dict[id]['concept_count'] / float(total_associated_condition_counts) return result_dict
def test_get_source_to_target_input(self): # initialise QueryCOHD object queryCOHD = QueryCOHD() # invalid parameter type result = queryCOHD.get_source_to_target('312327', 313217) self.assertEqual(result, []) # invalid parameter type result = queryCOHD.get_source_to_target(312327, '313217') self.assertEqual(result, [])
def test_get_xref_from_OMOP(self): result = QueryCOHD.get_xref_from_OMOP("192855", "UMLS", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 6) self.assertEqual(result[0], {"intermediate_omop_concept_code": "92546004", "intermediate_omop_concept_id": 192855, "intermediate_omop_concept_name": "Cancer in situ of urinary bladder", "intermediate_omop_vocabulary_id": "SNOMED", "intermediate_oxo_curie": "SNOMEDCT:92546004", "intermediate_oxo_label": "Cancer in situ of urinary bladder", "omop_distance": 0, "oxo_distance": 1, "source_omop_concept_code": "92546004", "source_omop_concept_id": 192855, "source_omop_concept_name": "Cancer in situ of urinary bladder", "source_omop_vocabulary_id": "SNOMED", "target_curie": "UMLS:C0154091", "target_label": "Cancer in situ of urinary bladder", "total_distance": 1 }) # default distance result = QueryCOHD.get_xref_from_OMOP("192855", "UMLS") self.assertIsNotNone(result) self.assertEqual(len(result), 6) self.assertEqual(result[0], {"intermediate_omop_concept_code": "92546004", "intermediate_omop_concept_id": 192855, "intermediate_omop_concept_name": "Cancer in situ of urinary bladder", "intermediate_omop_vocabulary_id": "SNOMED", "intermediate_oxo_curie": "SNOMEDCT:92546004", "intermediate_oxo_label": "Cancer in situ of urinary bladder", "omop_distance": 0, "oxo_distance": 1, "source_omop_concept_code": "92546004", "source_omop_concept_id": 192855, "source_omop_concept_name": "Cancer in situ of urinary bladder", "source_omop_vocabulary_id": "SNOMED", "target_curie": "UMLS:C0154091", "target_label": "Cancer in situ of urinary bladder", "total_distance": 1 }) # invalid concept id result = QueryCOHD.get_xref_from_OMOP("1928551", "UMLS", 2) self.assertEqual(result, []) # invalid mapping_targets result = QueryCOHD.get_xref_from_OMOP("1928551", "UMS", 2) self.assertEqual(result, []) # invalid distance format result = QueryCOHD.get_xref_from_OMOP("1928551", "UMLS", "2") self.assertEqual(result, [])
def test_find_concept_ids(self): # result = QueryCOHD.find_concept_ids("cancer", "Condition", dataset_id=1, min_count=0) # self.assertIsNotNone(result) # self.assertEqual(len(result), 84) # self.assertEqual(result[0], {'concept_class_id': 'Clinical Finding', # 'concept_code': '92546004', # 'concept_count': 368.0, # 'concept_id': 192855, # 'concept_name': 'Cancer in situ of urinary bladder', 'domain_id': 'Condition', # 'vocabulary_id': 'SNOMED'}) # default dataset_id and min_count result = QueryCOHD.find_concept_ids("cancer", "Condition") self.assertIsNotNone(result) self.assertEqual(len(result), 3) self.assertEqual(result[0], {'concept_class_id': 'Clinical Finding', 'concept_code': '92546004', 'concept_count': 368.0, 'concept_id': 192855, 'concept_name': 'Cancer in situ of urinary bladder', 'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'}) # default dataset_id and domain result = QueryCOHD.find_concept_ids("cancer") self.assertIsNotNone(result) self.assertEqual(len(result), 37) self.assertEqual(result[0], {'concept_class_id': 'Procedure', 'concept_code': '15886004', 'concept_count': 4195.0, 'concept_id': 4048727, 'concept_name': 'Screening for cancer', 'domain_id': 'Procedure', 'vocabulary_id': 'SNOMED'}) # invalid name value result = QueryCOHD.find_concept_ids("cancer1", "Condition") self.assertEqual(result, []) # invalid domain value result = QueryCOHD.find_concept_ids("cancer", "Conditi") self.assertEqual(result, []) # timeout case (backend timeout issue has been fixed) result = QueryCOHD.find_concept_ids("ibuprofen", "Drug", dataset_id=1, min_count=0) self.assertIsNotNone(result) self.assertEqual(len(result), 1000) self.assertEqual(result[0], {'concept_class_id': 'Clinical Drug', 'concept_code': '197806', 'concept_count': 115101, 'concept_id': 19019073, 'concept_name': 'Ibuprofen 600 MG Oral Tablet', 'domain_id': 'Drug', 'vocabulary_id': 'RxNorm'})
def test_get_individual_concept_freq(self): result = QueryCOHD.get_individual_concept_freq("2008271") self.assertIsNotNone(result) self.assertEqual(result['concept_frequency'], 0.0003831786451275983) self.assertEqual(result['concept_count'], 2042) # wrong ID result = QueryCOHD.get_individual_concept_freq("0") self.assertIsNone(result) # wrong parameter format result = QueryCOHD.get_individual_concept_freq(2008271) self.assertIsNone(result)
def test_get_paired_concept_freq(self): result = QueryCOHD.get_paired_concept_freq("2008271", "192855") self.assertIsNotNone(result) self.assertEqual(result['concept_frequency'], 0.000005066514896398214) self.assertEqual(result['concept_count'], 27) # wrong IDs result = QueryCOHD.get_paired_concept_freq("2008271", "2008271") self.assertIsNone(result) # wrong parameter format result = QueryCOHD.get_paired_concept_freq(2008271, 192855) self.assertIsNone(result)
def test_find_concept_ids(self): ids = QueryCOHD.find_concept_ids("cancer") self.assertIsNotNone(ids) self.assertEqual(len(ids), 2) self.assertEqual(ids[0]['concept_id'], '192855') self.assertEqual(ids[0]['concept_name'], 'Cancer in situ of urinary bladder') self.assertEqual(ids[1]['concept_id'], '2008271') self.assertEqual( ids[1]['concept_name'], 'Injection or infusion of cancer chemotherapeutic substance') # wrong label ids = QueryCOHD.find_concept_ids("cancers") self.assertIsNotNone(ids) self.assertEqual(len(ids), 0)
def test_get_patient_count(self): result = QueryCOHD.get_patient_count(2) self.assertIsNotNone(result) self.assertEqual(result, {'count': 5364781.0, 'dataset_id': 2}) # default dataset_id result = QueryCOHD.get_patient_count() self.assertIsNotNone(result) self.assertEqual(result, {'count': 1790431.0, 'dataset_id': 1}) # invalid dataset_id value result = QueryCOHD.get_patient_count(-1) self.assertEqual(result, {}) # invalid dataset_id type result = QueryCOHD.get_patient_count('1') self.assertEqual(result, {})
def test_get_paired_concept_freq(self): result = QueryCOHD.get_paired_concept_freq("2008271", "192855", 1) self.assertIsNotNone(result) self.assertEqual(result['concept_frequency'], 0.000005585247351056813) self.assertEqual(result['concept_count'], 10) # default dataset_id result = QueryCOHD.get_paired_concept_freq("2008271", "192855") self.assertIsNotNone(result) self.assertEqual(result['concept_frequency'], 0.000005585247351056813) self.assertEqual(result['concept_count'], 10) # invalid ID value result = QueryCOHD.get_paired_concept_freq("2008271", "2008271") self.assertEqual(result, {}) # invalid parameter type result = QueryCOHD.get_paired_concept_freq(2008271, 192855) self.assertEqual(result, {})
def test_get_individual_concept_freq(self): result = QueryCOHD.get_individual_concept_freq("192855", 1) self.assertIsNotNone(result) self.assertEqual(result['concept_frequency'], 0.0002055371025188907) self.assertEqual(result['concept_count'], 368) # default dataset id result = QueryCOHD.get_individual_concept_freq("192855") self.assertIsNotNone(result) self.assertEqual(result['concept_frequency'], 0.0002055371025188907) self.assertEqual(result['concept_count'], 368) # invalid ID value result = QueryCOHD.get_individual_concept_freq("0", 1) self.assertEqual(result, {}) # invalid concept_id type result = QueryCOHD.get_individual_concept_freq(2008271, 1) self.assertEqual(result, {})
def test_get_concepts(self): result = QueryCOHD.get_concepts(["192855", "2008271"]) self.assertIsNotNone(result) self.assertEqual(len(result), 2) self.assertEqual(result, [{'concept_class_id': 'Clinical Finding', 'concept_code': '92546004', 'concept_id': 192855, 'concept_name': 'Cancer in situ of urinary bladder', 'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'}, {'concept_class_id': '4-dig billing code', 'concept_code': '99.25', 'concept_id': 2008271, 'concept_name': 'Injection or infusion of cancer ' 'chemotherapeutic substance', 'domain_id': 'Procedure', 'vocabulary_id': 'ICD9Proc'}]) result = QueryCOHD.get_concepts(["192855"]) self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result, [{'concept_class_id': 'Clinical Finding', 'concept_code': '92546004', 'concept_id': 192855, 'concept_name': 'Cancer in situ of urinary bladder', 'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'}]) # invalid concept_id type result = QueryCOHD.get_concepts(["192855", 2008271]) self.assertEqual(result, [])
def test_get_domain_counts(self): result = QueryCOHD.get_domain_counts(1) self.assertIsNotNone(result) self.assertEqual(len(result), 10) self.assertEqual(result[0], {'count': 10159, 'dataset_id': 1, 'domain_id': 'Condition'}) # default dataset_id result = QueryCOHD.get_domain_counts() self.assertIsNotNone(result) self.assertEqual(len(result), 10) self.assertEqual(result[0], {'count': 10159, 'dataset_id': 1, 'domain_id': 'Condition'}) # invalid dataset_id value result = QueryCOHD.get_domain_counts(-1) self.assertEqual(result, []) # invalid dataset_id type result = QueryCOHD.get_domain_counts("1") self.assertEqual(result, [])
def test_get_associated_concept_domain_freq(self): result = QueryCOHD.get_associated_concept_domain_freq('192855', 'Procedure', 2) self.assertIsNotNone(result) self.assertEqual(result[0]['concept_frequency'], 0.0002508956097182718) self.assertEqual(len(result), 655) # default dataset_id result = QueryCOHD.get_associated_concept_domain_freq('192855', 'Procedure') self.assertIsNotNone(result) self.assertEqual(result[0]['concept_frequency'], 0.00016867447000191573) self.assertEqual(len(result), 159) # invalid concept ID value result = QueryCOHD.get_associated_concept_domain_freq("0", "drug") self.assertEqual(result, []) # invalid domain value result = QueryCOHD.get_associated_concept_domain_freq("192855", "dru") self.assertEqual(result, []) # invalid concept type result = QueryCOHD.get_associated_concept_domain_freq(192855, "drug") self.assertEqual(result, [])
def test_get_datasets(self): result = QueryCOHD.get_datasets() self.assertIsNotNone(result) self.assertEqual(len(result), 3) self.assertEqual(result, [{'dataset_description': "Clinical data from 2013-2017. Each concept's count reflects " "the use of that specific concept.", 'dataset_id': 1, 'dataset_name': "5-year non-hierarchical"}, {'dataset_description': "Clinical data from all years in the database. Each concept's" " count reflects the use of that specific concept.", 'dataset_id': 2, 'dataset_name': "Lifetime non-hierarchical"}, { "dataset_description": "Clinical data from 2013-2017. Each concept's count includes" " use of that concept and descendant concepts.", "dataset_id": 3, "dataset_name": "5-year hierarchical"} ])
def test_get_associated_concept_freq(self): result = QueryCOHD.get_associated_concept_freq("192855", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 2735) self.assertEqual(result[0], {'associated_concept_id': 197508, 'associated_concept_name': 'Malignant tumor of urinary bladder', 'associated_domain_id': 'Condition', 'concept_count': 1477, 'concept_frequency': 0.0002753141274545969, 'concept_id': 192855, 'dataset_id': 2}) # default dataset_id result = QueryCOHD.get_associated_concept_freq("192855") self.assertIsNotNone(result) self.assertEqual(len(result), 768) self.assertEqual(result[0], {'associated_concept_id': 2213216, 'associated_concept_name': 'Cytopathology, selective cellular enhancement technique with interpretation (eg, liquid based slide preparation method), except cervical or vaginal', 'associated_domain_id': 'Measurement', 'concept_count': 330, 'concept_frequency': 0.0001843131625848748, 'concept_id': 192855, 'dataset_id': 1}) # invalid conecpt_id value result = QueryCOHD.get_associated_concept_freq("1928551") self.assertEqual(result, []) # invalid dataset_id value result = QueryCOHD.get_associated_concept_freq("192855", 10) self.assertEqual(result, []) # invalid concept format result = QueryCOHD.get_associated_concept_freq(192855) self.assertEqual(result, []) # invalid dataset_id format result = QueryCOHD.get_associated_concept_freq("192855", "1") self.assertEqual(result, [])
def test_get_obs_exp_ratio(self): # default dataset_id result = QueryCOHD.get_obs_exp_ratio("192855", "2008271", "Procedure") self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result, [{'concept_id_1': 192855, 'concept_id_2': 2008271, 'dataset_id': 1, 'expected_count': 0.3070724311632227, 'ln_ratio': 3.483256720088832, 'observed_count': 10}]) # dataset_id == 2 result = QueryCOHD.get_obs_exp_ratio("192855", "2008271", "Procedure", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result, [{'concept_id_1': 192855, 'concept_id_2': 2008271, 'dataset_id': 2, 'expected_count': 5.171830872499735, 'ln_ratio': 3.634887899455015, 'observed_count': 196}]) # default domain result = QueryCOHD.get_obs_exp_ratio("192855", "2008271") self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result, [{'concept_id_1': 192855, 'concept_id_2': 2008271, 'dataset_id': 1, 'expected_count': 0.3070724311632227, 'ln_ratio': 3.483256720088832, 'observed_count': 10}]) # default domain, dataset_id == 2 result = QueryCOHD.get_obs_exp_ratio("192855", "2008271", "", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result, [{'concept_id_1': 192855, 'concept_id_2': 2008271, 'dataset_id': 2, 'expected_count': 5.171830872499735, 'ln_ratio': 3.634887899455015, 'observed_count': 196}]) # default concept_id_2, domain and dataset_id result = QueryCOHD.get_obs_exp_ratio("192855") self.assertIsNotNone(result) self.assertEqual(len(result), 768) # default concept_id_2 and domain, dataset_id == 2 result = QueryCOHD.get_obs_exp_ratio("192855", "", "", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 2735) # default concept_id_2 and dataset_id result = QueryCOHD.get_obs_exp_ratio("192855", "", "Procedure") self.assertIsNotNone(result) self.assertEqual(len(result), 159) # default concept_id_2 result = QueryCOHD.get_obs_exp_ratio("192855", "", "Procedure", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 655) # invalid concept_id_1 type result = QueryCOHD.get_obs_exp_ratio(192855, "2008271", "", 2) self.assertEqual(result, []) # invalid concept_id_2 type result = QueryCOHD.get_obs_exp_ratio("192855", 2008271, "", 2) self.assertEqual(result, []) # invalid dataset_id type result = QueryCOHD.get_obs_exp_ratio("192855", "2008271", "", "2") self.assertEqual(result, [])
def test_get_vocabularies(self): result = QueryCOHD.get_vocabularies() self.assertIsNotNone(result) self.assertEqual(len(result), 73) self.assertEqual(result[0]['vocabulary_id'], 'ABMS') self.assertEqual(result[1]['vocabulary_id'], 'AMT')
def test_get_chi_square(self): # default dataset_id result = QueryCOHD.get_chi_square("192855", "2008271", "Condition") self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result, [{'chi_square': 306.2816108187519, 'concept_id_1': 192855, 'concept_id_2': 2008271, 'dataset_id': 1, 'p-value': 1.4101531778039801e-68}]) # dataset_id == 2 result = QueryCOHD.get_chi_square("192855", "2008271", "Condition", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result, [{'chi_square': 7065.7865572100745, 'concept_id_1': 192855, 'concept_id_2': 2008271, 'dataset_id': 2, 'p-value': 0.0}]) # default domain and dataset_id result = QueryCOHD.get_chi_square("192855", "2008271") self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result, [{'chi_square': 306.2816108187519, 'concept_id_1': 192855, 'concept_id_2': 2008271, 'dataset_id': 1, 'p-value': 1.4101531778039801e-68}]) # no concept_id_2, default domain and dataset_id result = QueryCOHD.get_chi_square("192855") self.assertIsNotNone(result) self.assertEqual(len(result), 768) # no concept_id_2, default dataset_id result = QueryCOHD.get_chi_square("192855", "", "Condition") self.assertIsNotNone(result) self.assertEqual(len(result), 226) # no concept_id_2, dataset_id == 2 result = QueryCOHD.get_chi_square("192855", "", "Condition", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 991) # no concept_id_2, dataset_id == 2, default domain result = QueryCOHD.get_chi_square("192855", "", "", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 2735) # invalid concept_id_1 type result = QueryCOHD.get_chi_square(192855, "", "", 1) self.assertEqual(result, []) # invalid concept_id_2 type result = QueryCOHD.get_chi_square("192855", 2008271, "", 1) self.assertEqual(result, []) # invalid dataset_id value result = QueryCOHD.get_chi_square("192855", "2008271", "condition", 10) self.assertEqual(result, [])
def make_edge_attribute_from_curies(self, source_curie, target_curie, source_name="", target_name="", default=0., name=""): """ Generic function to make an edge attribute :source_curie: CURIE of the source node for the edge under consideration :target_curie: CURIE of the target node for the edge under consideration :source_name: text name of the source node (in case the KP doesn't understand the CURIE) :target: text name of the target node (in case the KP doesn't understand the CURIE) :default: default value of the edge attribute :name: name of the KP functionality you want to apply """ try: # edge attributes name = name type = "data:0951" url = "http://cohd.smart-api.info/" value = default node_curie_to_type = self.node_curie_to_type source_type = node_curie_to_type[source_curie] target_type = node_curie_to_type[target_curie] # figure out which knowledge provider to use # TODO: should handle this in a more structured fashion, does there exist a standardized KP API format? KP_to_use = None for KP in self.who_knows_about_what: # see which KP's can label both sources of information if self.in_common( source_type, self.who_knows_about_what[KP]) and self.in_common( target_type, self.who_knows_about_what[KP]): KP_to_use = KP if KP_to_use == 'COHD': # convert CURIE to OMOP identifiers source_OMOPs = [ str(x['omop_standard_concept_id']) for x in COHD.get_xref_to_OMOP(source_curie, 1) ] target_OMOPs = [ str(x['omop_standard_concept_id']) for x in COHD.get_xref_to_OMOP(target_curie, 1) ] # FIXME: Super hacky way to get around the fact that COHD can't map CHEMBL drugs if source_curie.split('.')[0] == 'CHEMBL': source_OMOPs = [ str(x['concept_id']) for x in COHD.find_concept_ids( source_name, domain="Drug", dataset_id=3) ] if target_curie.split('.')[0] == 'CHEMBL': target_OMOPs = [ str(x['concept_id']) for x in COHD.find_concept_ids( target_name, domain="Drug", dataset_id=3) ] # uniquify everything source_OMOPs = list(set(source_OMOPs)) target_OMOPs = list(set(target_OMOPs)) # Decide how to handle the response from the KP if name == 'paired_concept_frequency': # sum up all frequencies #TODO check with COHD people to see if this is kosher frequency = default for (omop1, omop2) in itertools.product(source_OMOPs, target_OMOPs): freq_data = COHD.get_paired_concept_freq( omop1, omop2, 3) # use the hierarchical dataset if freq_data and 'concept_frequency' in freq_data: frequency += freq_data['concept_frequency'] # decorate the edges value = frequency elif name == 'observed_expected_ratio': # should probably take the largest obs/exp ratio # TODO: check with COHD people to see if this is kosher # FIXME: the ln_ratio can be negative, so I should probably account for this, but the object model doesn't like -np.inf value = float( "-inf" ) # FIXME: unclear in object model if attribute type dictates value type, or if value always needs to be a string ############################### # The following code was an experiment to see if it would speed things up, leaving it out for now since it's difficult to quantify if it does speed things up given the cacheing #if len(source_OMOPs) < len(target_OMOPs): # for omop1 in source_OMOPs: # omop_to_ln_ratio = dict() # response = COHD.get_obs_exp_ratio(omop1, domain="", dataset_id=3) # use the hierarchical dataset # if response: # for res in response: # omop_to_ln_ratio[str(res['concept_id_2'])] = res['ln_ratio'] # for omop2 in target_OMOPs: # if omop2 in omop_to_ln_ratio: # temp_value = omop_to_ln_ratio[omop2] # if temp_value > value: # value = temp_value #else: # for omop1 in target_OMOPs: # omop_to_ln_ratio = dict() # response = COHD.get_obs_exp_ratio(omop1, domain="", dataset_id=3) # use the hierarchical dataset # if response: # for res in response: # omop_to_ln_ratio[str(res['concept_id_2'])] = res['ln_ratio'] # for omop2 in source_OMOPs: # if omop2 in omop_to_ln_ratio: # temp_value = omop_to_ln_ratio[omop2] # if temp_value > value: # value = temp_value ################################### for (omop1, omop2) in itertools.product(source_OMOPs, target_OMOPs): #print(f"{omop1},{omop2}") response = COHD.get_obs_exp_ratio( omop1, concept_id_2=omop2, domain="", dataset_id=3) # use the hierarchical dataset # response is a list, since this function is overloaded and can omit concept_id_2, take the first element if response and 'ln_ratio' in response[0]: temp_val = response[0]['ln_ratio'] if temp_val > value: value = temp_val elif name == 'chi_square': value = float("inf") for (omop1, omop2) in itertools.product(source_OMOPs, target_OMOPs): response = COHD.get_chi_square( omop1, concept_id_2=omop2, domain="", dataset_id=3) # use the hierarchical dataset # response is a list, since this function is overloaded and can omit concept_id_2, take the first element if response and 'p-value' in response[0]: temp_val = response[0]['p-value'] if temp_val < value: # looking at p=values, so lower is better value = temp_val # create the edge attribute edge_attribute = EdgeAttribute( type=type, name=name, value=str(value), url=url ) # populate the edge attribute # FIXME: unclear in object model if attribute type dictates value type, or if value always needs to be a string return edge_attribute else: return None except: tb = traceback.format_exc() error_type, error, _ = sys.exc_info() self.response.error(tb, error_code=error_type.__name__) self.response.error( f"Something went wrong when adding the edge attribute from {KP_to_use}." )
def test_get_relative_frequency(self): # default dataset_id result = QueryCOHD.get_relative_frequency("192855", "2008271", "Procedure") self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result, [{'concept_2_count': 1494, 'concept_id_1': 192855, 'concept_id_2': 2008271, 'concept_pair_count': 10, 'dataset_id': 1, 'relative_frequency': 0.006693440428380187}]) # dataset_id == 2 result = QueryCOHD.get_relative_frequency("192855", "2008271", "Procedure", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result, [{'concept_2_count': 17127, 'concept_id_1': 192855, 'concept_id_2': 2008271, 'concept_pair_count': 196, 'dataset_id': 2, 'relative_frequency': 0.011443918958369825}]) # default domain result = QueryCOHD.get_relative_frequency("192855", "2008271") self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result, [{'concept_2_count': 1494, 'concept_id_1': 192855, 'concept_id_2': 2008271, 'concept_pair_count': 10, 'dataset_id': 1, 'relative_frequency': 0.006693440428380187}]) # default domain, dataset_id == 2 result = QueryCOHD.get_relative_frequency("192855", "2008271", "", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result, [{'concept_2_count': 17127, 'concept_id_1': 192855, 'concept_id_2': 2008271, 'concept_pair_count': 196, 'dataset_id': 2, 'relative_frequency': 0.011443918958369825}]) # default concept_id_2, domain and dataset_id result = QueryCOHD.get_relative_frequency("192855") self.assertIsNotNone(result) self.assertEqual(len(result), 768) # default concept_id_2 and domain, dataset_id == 2 result = QueryCOHD.get_relative_frequency("192855", "", "", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 2735) # default concept_id_2 and dataset_id result = QueryCOHD.get_relative_frequency("192855", "", "Procedure") self.assertIsNotNone(result) self.assertEqual(len(result), 159) # default concept_id_2 result = QueryCOHD.get_relative_frequency("192855", "", "Procedure", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 655) # invalid concept_id_1 type result = QueryCOHD.get_relative_frequency(192855, "2008271", "", 2) self.assertEqual(result, []) # invalid concept_id_2 type result = QueryCOHD.get_relative_frequency("192855", 2008271, "", 2) self.assertEqual(result, []) # invalid dataset_id type result = QueryCOHD.get_relative_frequency("192855", "2008271", "", "2") self.assertEqual(result, [])
def test_get_most_frequent_concepts(self): # default domain and dataset_id result = QueryCOHD.get_most_frequent_concepts(10) self.assertIsNotNone(result) self.assertEqual(len(result), 10) self.assertEqual(result[0], {'concept_class_id': 'Undefined', 'concept_count': 1189172, 'concept_frequency': 0.6641819762950932, 'concept_id': 44814653, 'concept_name': 'Unknown', 'dataset_id': 1, 'domain_id': 'Observation', 'vocabulary_id': 'PCORNet'}) # default dataset_id result = QueryCOHD.get_most_frequent_concepts(10, "Condition") self.assertIsNotNone(result) self.assertEqual(len(result), 10) self.assertEqual(result[0], { 'concept_class_id': 'Clinical Finding', 'concept_count': 233790, 'concept_frequency': 0.1305774978203572, 'concept_id': 320128, 'concept_name': 'Essential hypertension', 'dataset_id': 1, 'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'}) # no default value result = QueryCOHD.get_most_frequent_concepts(10, "Condition", 2) self.assertIsNotNone(result) self.assertEqual(len(result), 10) self.assertEqual(result[0], {'concept_class_id': 'Clinical Finding', 'concept_count': 459776, 'concept_frequency': 0.08570265962394365, 'concept_id': 320128, 'concept_name': 'Essential hypertension', 'dataset_id': 2, 'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'}) # invalid num value result = QueryCOHD.get_most_frequent_concepts(-10) self.assertEqual(result, []) # invalid num type result = QueryCOHD.get_most_frequent_concepts("10") self.assertEqual(result, []) # invalid domain value result = QueryCOHD.get_most_frequent_concepts(10, "Condition1") self.assertEqual(result, []) # invalid domain type result = QueryCOHD.get_most_frequent_concepts(10, 1, 2) self.assertEqual(result, []) # invalid dataset_id value result = QueryCOHD.get_most_frequent_concepts(10, "Condition", 10) self.assertEqual(result, []) # invalid dataset_id type result = QueryCOHD.get_most_frequent_concepts(10, "Condition", "2") self.assertEqual(result, [])
def test_get_concept_descendants(self): result = QueryCOHD.get_concept_descendants('19019073', 'RxNorm', 'Branded Drug', 1) self.assertIsNotNone(result) self.assertEqual(len(result), 3) self.assertEqual(result[0], { "concept_class_id": "Branded Drug", "concept_code": "206913", "concept_count": 14744, "concept_name": "Ibuprofen 600 MG Oral Tablet [Ibu]", "descendant_concept_id": 19033921, "domain_id": "Drug", "max_levels_of_separation": 0, "min_levels_of_separation": 0, "standard_concept": "S", "vocabulary_id": "RxNorm"}) # default dataset_id result = QueryCOHD.get_concept_descendants('19019073', 'RxNorm', 'Branded Drug') self.assertIsNotNone(result) self.assertEqual(len(result), 3) self.assertEqual(result[0], { "concept_class_id": "Branded Drug", "concept_code": "206913", "concept_count": 14853, "concept_name": "Ibuprofen 600 MG Oral Tablet [Ibu]", "descendant_concept_id": 19033921, "domain_id": "Drug", "max_levels_of_separation": 0, "min_levels_of_separation": 0, "standard_concept": "S", "vocabulary_id": "RxNorm"}) # default concept_class_id result = QueryCOHD.get_concept_descendants('19019073', 'RxNorm') self.assertIsNotNone(result) self.assertEqual(len(result), 4) self.assertEqual(result[0], { "concept_class_id": "Clinical Drug", "concept_code": "197806", "concept_count": 121104, "concept_name": "Ibuprofen 600 MG Oral Tablet", "descendant_concept_id": 19019073, "domain_id": "Drug", "max_levels_of_separation": 0, "min_levels_of_separation": 0, "standard_concept": "S", "vocabulary_id": "RxNorm"}) # default vocabulary_id result = QueryCOHD.get_concept_descendants('19019073') self.assertIsNotNone(result) self.assertEqual(len(result), 4) self.assertEqual(result[0], { "concept_class_id": "Clinical Drug", "concept_code": "197806", "concept_count": 121104, "concept_name": "Ibuprofen 600 MG Oral Tablet", "descendant_concept_id": 19019073, "domain_id": "Drug", "max_levels_of_separation": 0, "min_levels_of_separation": 0, "standard_concept": "S", "vocabulary_id": "RxNorm"})
def test_get_concept_ancestors(self): result = QueryCOHD.get_concept_ancestors('19019073', 'RxNorm', 'Ingredient', 1) self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result[0], {'ancestor_concept_id': 1177480, 'concept_class_id': 'Ingredient', 'concept_code': '5640', 'concept_count': 174, 'concept_name': 'Ibuprofen', 'domain_id': 'Drug', 'max_levels_of_separation': 2, 'min_levels_of_separation': 2, 'standard_concept': 'S', 'vocabulary_id': 'RxNorm'}) # default dataset_id result = QueryCOHD.get_concept_ancestors('19019073', 'RxNorm', 'Ingredient') self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result[0], {'ancestor_concept_id': 1177480, 'concept_class_id': 'Ingredient', 'concept_code': '5640', 'concept_count': 233514, 'concept_name': 'Ibuprofen', 'domain_id': 'Drug', 'max_levels_of_separation': 2, 'min_levels_of_separation': 2, 'standard_concept': 'S', 'vocabulary_id': 'RxNorm'}) # default concept_class_id result = QueryCOHD.get_concept_ancestors('19019073', 'RxNorm') self.assertIsNotNone(result) self.assertEqual(len(result), 4) self.assertEqual(result[0], { "ancestor_concept_id": 19019073, "concept_class_id": "Clinical Drug", "concept_code": "197806", "concept_count": 121104, "concept_name": "Ibuprofen 600 MG Oral Tablet", "domain_id": "Drug", "max_levels_of_separation": 0, "min_levels_of_separation": 0, "standard_concept": "S", "vocabulary_id": "RxNorm"}) # default vocabulary_id result = QueryCOHD.get_concept_ancestors('19019073') self.assertIsNotNone(result) self.assertEqual(len(result), 8) self.assertEqual(result[0], { "ancestor_concept_id": 19019073, "concept_class_id": "Clinical Drug", "concept_code": "197806", "concept_count": 121104, "concept_name": "Ibuprofen 600 MG Oral Tablet", "domain_id": "Drug", "max_levels_of_separation": 0, "min_levels_of_separation": 0, "standard_concept": "S", "vocabulary_id": "RxNorm"})