Beispiel #1
0
    def test_get_map_to_standard_concept_id(self):
        result = QueryCOHD.get_map_to_standard_concept_id("715.3", "ICD9CM")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0], {"source_concept_code": "715.3", "source_concept_id": 44834979,
                                     "source_concept_name": "Osteoarthrosis, localized, not specified whether primary or secondary",
                                     "source_vocabulary_id": "ICD9CM", "standard_concept_id": 72990,
                                     "standard_concept_name": "Localized osteoarthrosis uncertain if primary OR secondary",
                                     "standard_domain_id": "Condition"
                                     })

        # default vocabulary
        result = QueryCOHD.get_map_to_standard_concept_id("715.3")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0], {"source_concept_code": "715.3", "source_concept_id": 44834979,
                                     "source_concept_name": "Osteoarthrosis, localized, not specified whether primary or secondary",
                                     "source_vocabulary_id": "ICD9CM", "standard_concept_id": 72990,
                                     "standard_concept_name": "Localized osteoarthrosis uncertain if primary OR secondary",
                                     "standard_domain_id": "Condition"
                                     })

        #   invalid concept_code id
        result = QueryCOHD.get_map_from_standard_concept_id("725.3")
        self.assertEqual(result, [])

        #   invalid concept_code format
        result = QueryCOHD.get_map_from_standard_concept_id(725.3)
        self.assertEqual(result, [])
Beispiel #2
0
    def test_get_xref_to_OMOP(self):
        result = QueryCOHD.get_xref_to_OMOP("DOID:8398", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0], {'intermediate_oxo_id': 'ICD9CM:715.3', 'intermediate_oxo_label': '',
                                     'omop_concept_name': 'Localized osteoarthrosis uncertain if primary OR secondary',
                                     'omop_distance': 1, 'omop_domain_id': 'Condition', 'omop_standard_concept_id': 72990,
                                     'oxo_distance': 1, 'source_oxo_id': 'DOID:8398', 'source_oxo_label': 'osteoarthritis',
                                     'total_distance': 2})

        #   default distance
        result = QueryCOHD.get_xref_to_OMOP("DOID:8398")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0], {'intermediate_oxo_id': 'ICD9CM:715.3', 'intermediate_oxo_label': '',
                                     'omop_concept_name': 'Localized osteoarthrosis uncertain if primary OR secondary',
                                     'omop_distance': 1, 'omop_domain_id': 'Condition',
                                     'omop_standard_concept_id': 72990,
                                     'oxo_distance': 1, 'source_oxo_id': 'DOID:8398',
                                     'source_oxo_label': 'osteoarthritis',
                                     'total_distance': 2})

        #   default distance
        result = QueryCOHD.get_xref_to_OMOP("DOID:8398")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 3)

        #   invalid curie id
        result = QueryCOHD.get_xref_to_OMOP("DOID:83981", 2)
        self.assertEqual(result, [])

        #   invalid distance format
        result = QueryCOHD.get_xref_to_OMOP("DOID:8398", "2")
        self.assertEqual(result, [])
Beispiel #3
0
    def test_get_map_from_standard_concept_id(self):
        result = QueryCOHD.get_map_from_standard_concept_id("72990", "ICD9CM")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 10)
        self.assertEqual(result[0], {"concept_class_id": "4-dig nonbill code", "concept_code": "715.3",
                                     "concept_id": 44834979,
                                     "concept_name": "Osteoarthrosis, localized, not specified whether primary or secondary",
                                     "domain_id": "Condition", "standard_concept": None, "vocabulary_id": "ICD9CM"
                                     })

        #   default vocabulary
        result = QueryCOHD.get_map_from_standard_concept_id("72990")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 12)
        self.assertEqual(result[0], {"concept_class_id": "Diagnosis", "concept_code": "116253", "concept_id": 45930832,
                                     "concept_name": "Localized Osteoarthrosis Uncertain If Primary or Secondary",
                                     "domain_id": "Condition", "standard_concept": None, "vocabulary_id": "CIEL"})

        #   invalid concept_id id
        result = QueryCOHD.get_map_from_standard_concept_id("DOID:839812", 2)
        self.assertEqual(result, [])

        #   invalid concept_id format
        result = QueryCOHD.get_map_from_standard_concept_id(8398, 2)
        self.assertEqual(result, [])
Beispiel #4
0
    def test_get_domain_pair_counts(self):
        result = QueryCOHD.get_domain_pair_counts(1)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 50)
        self.assertEqual(result[0], {'count': 1933917,
                                     'dataset_id': 1,
                                     'domain_id_1': 'Condition',
                                     'domain_id_2': 'Condition'})

        #   default dataset_id
        result = QueryCOHD.get_domain_pair_counts()
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 50)
        self.assertEqual(result[0], {'count': 1933917,
                                     'dataset_id': 1,
                                     'domain_id_1': 'Condition',
                                     'domain_id_2': 'Condition'})

        #   invalid dataset_id value
        result = QueryCOHD.get_domain_pair_counts(-1)
        self.assertEqual(result, [])

        #   invalid dataset_id type
        result = QueryCOHD.get_domain_pair_counts('1')
        self.assertEqual(result, [])
Beispiel #5
0
    def test_get_source_to_target_has_result(self):
        # initialise QueryCOHD object
        queryCOHD = QueryCOHD()

        # check if correct result is returned
        result = queryCOHD.get_source_to_target(312327, 313217)
        self.assertIsNotNone(result)
Beispiel #6
0
    def test_get_source_to_target_result(self):
        # initialise QueryCOHD object
        queryCOHD = QueryCOHD()
        # load test data
        with open("./tests/NewQueryCOHDTestsData.json") as file:
            test_data = json.load(file)

        # check if correct result is returned
        result = queryCOHD.get_source_to_target(312327, 313217)
        self.assertListEqual(result, test_data)
Beispiel #7
0
	def get_conditions_treating(drug_description, conservative=False):
		"""
		Get all the conditions that are associated with a drug.
		:param drug_description: string (eg. 'Naproxen')
		:param conservative: bool (True= use exact matching for mapping drug to COHD, False = use all synonyms returned by COHD)
		:return: dictionary of dictionaries (eg. keys are concept IDs, values look like:
		{'associated_concept_id': 134736,
  		'associated_concept_name': 'Backache',
		'concept_count': 112,
		'concept_frequency': 2.101665438505926e-05,
		'concept_id': 1115008}
		"""

		# Get the concept ID of the drug
		drug_concepts = QueryCOHD.find_concept_ids(drug_description)
		drug_ids = []
		if conservative:
			for concept in drug_concepts:
				if concept['concept_name'].lower() == drug_description.lower():
					drug_ids.append(concept['concept_id'])
		if not conservative:
			for concept in drug_concepts:
				drug_ids.append(concept['concept_id'])

		# get all the associated conditions
		associated_concepts = []
		for drug_id in drug_ids:
			associated_concepts += QueryCOHD.get_associated_concept_domain_freq(str(drug_id), "Condition")
		print(len(associated_concepts))

		# go through and sum them all up (no need for conservative flag since that will only be a single one)
		# get all the unique condition ids
		associated_concept_ids = set()
		for concept in associated_concepts:
			associated_concept_ids.add(concept['associated_concept_id'])

		# go through the associated conditions, summing up the concept counts
		result_dict = dict()
		for associated_concept in associated_concepts:
			id = associated_concept['associated_concept_id']
			if id in result_dict:
				result_dict[id]['concept_count'] += associated_concept['concept_count']
			else:
				result_dict[id] = associated_concept

		# We'll need to adjust the frequencies in terms of the total patients treated with this drug
		total_associated_condition_counts = 0
		for id in result_dict:
			total_associated_condition_counts += result_dict[id]['concept_count']

		for id in result_dict:
			result_dict[id]['concept_frequency'] = result_dict[id]['concept_count'] / float(total_associated_condition_counts)

		return result_dict
Beispiel #8
0
    def test_get_source_to_target_input(self):
        # initialise QueryCOHD object
        queryCOHD = QueryCOHD()

        # invalid parameter type
        result = queryCOHD.get_source_to_target('312327', 313217)
        self.assertEqual(result, [])

        # invalid parameter type
        result = queryCOHD.get_source_to_target(312327, '313217')
        self.assertEqual(result, [])
Beispiel #9
0
    def test_get_xref_from_OMOP(self):
        result = QueryCOHD.get_xref_from_OMOP("192855", "UMLS", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 6)
        self.assertEqual(result[0], {"intermediate_omop_concept_code": "92546004",
                                     "intermediate_omop_concept_id": 192855,
                                     "intermediate_omop_concept_name": "Cancer in situ of urinary bladder",
                                     "intermediate_omop_vocabulary_id": "SNOMED",
                                     "intermediate_oxo_curie": "SNOMEDCT:92546004",
                                     "intermediate_oxo_label": "Cancer in situ of urinary bladder",
                                     "omop_distance": 0,
                                     "oxo_distance": 1,
                                     "source_omop_concept_code": "92546004",
                                     "source_omop_concept_id": 192855,
                                     "source_omop_concept_name": "Cancer in situ of urinary bladder",
                                     "source_omop_vocabulary_id": "SNOMED",
                                     "target_curie": "UMLS:C0154091",
                                     "target_label": "Cancer in situ of urinary bladder",
                                     "total_distance": 1
                                     })

        #   default distance
        result = QueryCOHD.get_xref_from_OMOP("192855", "UMLS")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 6)
        self.assertEqual(result[0], {"intermediate_omop_concept_code": "92546004",
                                     "intermediate_omop_concept_id": 192855,
                                     "intermediate_omop_concept_name": "Cancer in situ of urinary bladder",
                                     "intermediate_omop_vocabulary_id": "SNOMED",
                                     "intermediate_oxo_curie": "SNOMEDCT:92546004",
                                     "intermediate_oxo_label": "Cancer in situ of urinary bladder",
                                     "omop_distance": 0,
                                     "oxo_distance": 1,
                                     "source_omop_concept_code": "92546004",
                                     "source_omop_concept_id": 192855,
                                     "source_omop_concept_name": "Cancer in situ of urinary bladder",
                                     "source_omop_vocabulary_id": "SNOMED",
                                     "target_curie": "UMLS:C0154091",
                                     "target_label": "Cancer in situ of urinary bladder",
                                     "total_distance": 1
                                     })

        #   invalid concept id
        result = QueryCOHD.get_xref_from_OMOP("1928551", "UMLS", 2)
        self.assertEqual(result, [])

        #   invalid mapping_targets
        result = QueryCOHD.get_xref_from_OMOP("1928551", "UMS", 2)
        self.assertEqual(result, [])

        #   invalid distance format
        result = QueryCOHD.get_xref_from_OMOP("1928551", "UMLS", "2")
        self.assertEqual(result, [])
Beispiel #10
0
    def test_find_concept_ids(self):
        # result = QueryCOHD.find_concept_ids("cancer", "Condition", dataset_id=1, min_count=0)
        # self.assertIsNotNone(result)
        # self.assertEqual(len(result), 84)
        # self.assertEqual(result[0], {'concept_class_id': 'Clinical Finding',
        #                              'concept_code': '92546004',
        #                              'concept_count': 368.0,
        #                              'concept_id': 192855,
        #                              'concept_name': 'Cancer in situ of urinary bladder', 'domain_id': 'Condition',
        #                              'vocabulary_id': 'SNOMED'})

        #   default dataset_id and min_count
        result = QueryCOHD.find_concept_ids("cancer", "Condition")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0], {'concept_class_id': 'Clinical Finding',
                                     'concept_code': '92546004',
                                     'concept_count': 368.0,
                                     'concept_id': 192855,
                                     'concept_name': 'Cancer in situ of urinary bladder', 'domain_id': 'Condition',
                                     'vocabulary_id': 'SNOMED'})

        #   default dataset_id and domain
        result = QueryCOHD.find_concept_ids("cancer")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 37)
        self.assertEqual(result[0], {'concept_class_id': 'Procedure',
                                     'concept_code': '15886004',
                                     'concept_count': 4195.0,
                                     'concept_id': 4048727,
                                     'concept_name': 'Screening for cancer',
                                     'domain_id': 'Procedure',
                                     'vocabulary_id': 'SNOMED'})

        #   invalid name value
        result = QueryCOHD.find_concept_ids("cancer1", "Condition")
        self.assertEqual(result, [])

        #   invalid domain value
        result = QueryCOHD.find_concept_ids("cancer", "Conditi")
        self.assertEqual(result, [])

        #   timeout case (backend timeout issue has been fixed)
        result = QueryCOHD.find_concept_ids("ibuprofen", "Drug", dataset_id=1, min_count=0)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1000)
        self.assertEqual(result[0], {'concept_class_id': 'Clinical Drug',
                                     'concept_code': '197806',
                                     'concept_count': 115101,
                                     'concept_id': 19019073,
                                     'concept_name': 'Ibuprofen 600 MG Oral Tablet',
                                     'domain_id': 'Drug',
                                     'vocabulary_id': 'RxNorm'})
Beispiel #11
0
    def test_get_individual_concept_freq(self):
        result = QueryCOHD.get_individual_concept_freq("2008271")
        self.assertIsNotNone(result)
        self.assertEqual(result['concept_frequency'], 0.0003831786451275983)
        self.assertEqual(result['concept_count'], 2042)

        # wrong ID
        result = QueryCOHD.get_individual_concept_freq("0")
        self.assertIsNone(result)

        # wrong parameter format
        result = QueryCOHD.get_individual_concept_freq(2008271)
        self.assertIsNone(result)
Beispiel #12
0
    def test_get_paired_concept_freq(self):
        result = QueryCOHD.get_paired_concept_freq("2008271", "192855")
        self.assertIsNotNone(result)
        self.assertEqual(result['concept_frequency'], 0.000005066514896398214)
        self.assertEqual(result['concept_count'], 27)

        # wrong IDs
        result = QueryCOHD.get_paired_concept_freq("2008271", "2008271")
        self.assertIsNone(result)

        # wrong parameter format
        result = QueryCOHD.get_paired_concept_freq(2008271, 192855)
        self.assertIsNone(result)
Beispiel #13
0
    def test_find_concept_ids(self):
        ids = QueryCOHD.find_concept_ids("cancer")
        self.assertIsNotNone(ids)
        self.assertEqual(len(ids), 2)
        self.assertEqual(ids[0]['concept_id'], '192855')
        self.assertEqual(ids[0]['concept_name'],
                         'Cancer in situ of urinary bladder')
        self.assertEqual(ids[1]['concept_id'], '2008271')
        self.assertEqual(
            ids[1]['concept_name'],
            'Injection or infusion of cancer chemotherapeutic substance')

        # wrong label
        ids = QueryCOHD.find_concept_ids("cancers")
        self.assertIsNotNone(ids)
        self.assertEqual(len(ids), 0)
Beispiel #14
0
    def test_get_patient_count(self):
        result = QueryCOHD.get_patient_count(2)
        self.assertIsNotNone(result)
        self.assertEqual(result, {'count': 5364781.0, 'dataset_id': 2})

        #   default dataset_id
        result = QueryCOHD.get_patient_count()
        self.assertIsNotNone(result)
        self.assertEqual(result, {'count': 1790431.0, 'dataset_id': 1})

        #   invalid dataset_id value
        result = QueryCOHD.get_patient_count(-1)
        self.assertEqual(result, {})

        #   invalid dataset_id type
        result = QueryCOHD.get_patient_count('1')
        self.assertEqual(result, {})
Beispiel #15
0
    def test_get_paired_concept_freq(self):
        result = QueryCOHD.get_paired_concept_freq("2008271", "192855", 1)
        self.assertIsNotNone(result)
        self.assertEqual(result['concept_frequency'], 0.000005585247351056813)
        self.assertEqual(result['concept_count'], 10)

        #   default dataset_id
        result = QueryCOHD.get_paired_concept_freq("2008271", "192855")
        self.assertIsNotNone(result)
        self.assertEqual(result['concept_frequency'], 0.000005585247351056813)
        self.assertEqual(result['concept_count'], 10)

        #   invalid ID value
        result = QueryCOHD.get_paired_concept_freq("2008271", "2008271")
        self.assertEqual(result, {})

        #   invalid parameter type
        result = QueryCOHD.get_paired_concept_freq(2008271, 192855)
        self.assertEqual(result, {})
Beispiel #16
0
    def test_get_individual_concept_freq(self):
        result = QueryCOHD.get_individual_concept_freq("192855", 1)
        self.assertIsNotNone(result)
        self.assertEqual(result['concept_frequency'], 0.0002055371025188907)
        self.assertEqual(result['concept_count'], 368)

        #   default dataset id
        result = QueryCOHD.get_individual_concept_freq("192855")
        self.assertIsNotNone(result)
        self.assertEqual(result['concept_frequency'], 0.0002055371025188907)
        self.assertEqual(result['concept_count'], 368)

        #   invalid ID value
        result = QueryCOHD.get_individual_concept_freq("0", 1)
        self.assertEqual(result, {})

        #   invalid concept_id type
        result = QueryCOHD.get_individual_concept_freq(2008271, 1)
        self.assertEqual(result, {})
Beispiel #17
0
    def test_get_concepts(self):
        result = QueryCOHD.get_concepts(["192855", "2008271"])
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [{'concept_class_id': 'Clinical Finding', 'concept_code': '92546004',
                                   'concept_id': 192855, 'concept_name': 'Cancer in situ of urinary bladder',
                                   'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'},
                                  {'concept_class_id': '4-dig billing code', 'concept_code': '99.25',
                                   'concept_id': 2008271, 'concept_name': 'Injection or infusion of cancer '
                                                                          'chemotherapeutic substance',
                                   'domain_id': 'Procedure', 'vocabulary_id': 'ICD9Proc'}])

        result = QueryCOHD.get_concepts(["192855"])
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'concept_class_id': 'Clinical Finding', 'concept_code': '92546004',
                                   'concept_id': 192855, 'concept_name': 'Cancer in situ of urinary bladder',
                                   'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'}])

        #   invalid concept_id type
        result = QueryCOHD.get_concepts(["192855", 2008271])
        self.assertEqual(result, [])
Beispiel #18
0
    def test_get_domain_counts(self):
        result = QueryCOHD.get_domain_counts(1)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 10)
        self.assertEqual(result[0], {'count': 10159,
                                     'dataset_id': 1,
                                     'domain_id': 'Condition'})

        #   default dataset_id
        result = QueryCOHD.get_domain_counts()
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 10)
        self.assertEqual(result[0], {'count': 10159,
                                     'dataset_id': 1,
                                     'domain_id': 'Condition'})

        #   invalid dataset_id value
        result = QueryCOHD.get_domain_counts(-1)
        self.assertEqual(result, [])

        #   invalid dataset_id type
        result = QueryCOHD.get_domain_counts("1")
        self.assertEqual(result, [])
Beispiel #19
0
    def test_get_associated_concept_domain_freq(self):
        result = QueryCOHD.get_associated_concept_domain_freq('192855', 'Procedure', 2)
        self.assertIsNotNone(result)
        self.assertEqual(result[0]['concept_frequency'], 0.0002508956097182718)
        self.assertEqual(len(result), 655)

        #   default dataset_id
        result = QueryCOHD.get_associated_concept_domain_freq('192855', 'Procedure')
        self.assertIsNotNone(result)
        self.assertEqual(result[0]['concept_frequency'], 0.00016867447000191573)
        self.assertEqual(len(result), 159)

        #   invalid concept ID value
        result = QueryCOHD.get_associated_concept_domain_freq("0", "drug")
        self.assertEqual(result, [])

        #   invalid domain value
        result = QueryCOHD.get_associated_concept_domain_freq("192855", "dru")
        self.assertEqual(result, [])

        #   invalid concept type
        result = QueryCOHD.get_associated_concept_domain_freq(192855, "drug")
        self.assertEqual(result, [])
Beispiel #20
0
 def test_get_datasets(self):
     result = QueryCOHD.get_datasets()
     self.assertIsNotNone(result)
     self.assertEqual(len(result), 3)
     self.assertEqual(result, [{'dataset_description': "Clinical data from 2013-2017. Each concept's count reflects "
                                                       "the use of that specific concept.",
                                'dataset_id': 1,
                                'dataset_name': "5-year non-hierarchical"},
                               {'dataset_description': "Clinical data from all years in the database. Each concept's"
                                                       " count reflects the use of that specific concept.",
                                'dataset_id': 2,
                                'dataset_name': "Lifetime non-hierarchical"},
                               {
                                 "dataset_description": "Clinical data from 2013-2017. Each concept's count includes"
                                                        " use of that concept and descendant concepts.",
                                 "dataset_id": 3,
                                 "dataset_name": "5-year hierarchical"}
                               ])
Beispiel #21
0
    def test_get_associated_concept_freq(self):
        result = QueryCOHD.get_associated_concept_freq("192855", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 2735)
        self.assertEqual(result[0], {'associated_concept_id': 197508,
                                     'associated_concept_name': 'Malignant tumor of urinary bladder',
                                     'associated_domain_id': 'Condition',
                                     'concept_count': 1477,
                                     'concept_frequency': 0.0002753141274545969,
                                     'concept_id': 192855,
                                     'dataset_id': 2})

        #   default dataset_id
        result = QueryCOHD.get_associated_concept_freq("192855")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 768)
        self.assertEqual(result[0], {'associated_concept_id': 2213216,
                                     'associated_concept_name': 'Cytopathology, selective cellular enhancement technique with interpretation (eg, liquid based slide preparation method), except cervical or vaginal',
                                     'associated_domain_id': 'Measurement',
                                     'concept_count': 330,
                                     'concept_frequency': 0.0001843131625848748,
                                     'concept_id': 192855,
                                     'dataset_id': 1})

        #   invalid conecpt_id value
        result = QueryCOHD.get_associated_concept_freq("1928551")
        self.assertEqual(result, [])

        #   invalid dataset_id value
        result = QueryCOHD.get_associated_concept_freq("192855", 10)
        self.assertEqual(result, [])

        #   invalid concept format
        result = QueryCOHD.get_associated_concept_freq(192855)
        self.assertEqual(result, [])

        #   invalid dataset_id format
        result = QueryCOHD.get_associated_concept_freq("192855", "1")
        self.assertEqual(result, [])
Beispiel #22
0
    def test_get_obs_exp_ratio(self):
        #   default dataset_id
        result = QueryCOHD.get_obs_exp_ratio("192855", "2008271", "Procedure")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'concept_id_1': 192855,
                                   'concept_id_2': 2008271,
                                   'dataset_id': 1,
                                   'expected_count': 0.3070724311632227,
                                   'ln_ratio': 3.483256720088832,
                                   'observed_count': 10}])

        #   dataset_id == 2
        result = QueryCOHD.get_obs_exp_ratio("192855", "2008271", "Procedure", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'concept_id_1': 192855,
                                   'concept_id_2': 2008271,
                                   'dataset_id': 2,
                                   'expected_count': 5.171830872499735,
                                   'ln_ratio': 3.634887899455015,
                                   'observed_count': 196}])
        #   default domain
        result = QueryCOHD.get_obs_exp_ratio("192855", "2008271")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'concept_id_1': 192855,
                                   'concept_id_2': 2008271,
                                   'dataset_id': 1,
                                   'expected_count': 0.3070724311632227,
                                   'ln_ratio': 3.483256720088832,
                                   'observed_count': 10}])

        #   default domain, dataset_id == 2
        result = QueryCOHD.get_obs_exp_ratio("192855", "2008271", "", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'concept_id_1': 192855,
                                   'concept_id_2': 2008271,
                                   'dataset_id': 2,
                                   'expected_count': 5.171830872499735,
                                   'ln_ratio': 3.634887899455015,
                                   'observed_count': 196}])

        #   default concept_id_2, domain and dataset_id
        result = QueryCOHD.get_obs_exp_ratio("192855")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 768)

        #   default concept_id_2 and domain, dataset_id == 2
        result = QueryCOHD.get_obs_exp_ratio("192855", "", "", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 2735)

        #   default concept_id_2 and dataset_id
        result = QueryCOHD.get_obs_exp_ratio("192855", "", "Procedure")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 159)

        #   default concept_id_2
        result = QueryCOHD.get_obs_exp_ratio("192855", "", "Procedure", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 655)

        #   invalid concept_id_1 type
        result = QueryCOHD.get_obs_exp_ratio(192855, "2008271", "", 2)
        self.assertEqual(result, [])

        #   invalid concept_id_2 type
        result = QueryCOHD.get_obs_exp_ratio("192855", 2008271, "", 2)
        self.assertEqual(result, [])

        #   invalid dataset_id type
        result = QueryCOHD.get_obs_exp_ratio("192855", "2008271", "", "2")
        self.assertEqual(result, [])
Beispiel #23
0
 def test_get_vocabularies(self):
     result = QueryCOHD.get_vocabularies()
     self.assertIsNotNone(result)
     self.assertEqual(len(result), 73)
     self.assertEqual(result[0]['vocabulary_id'], 'ABMS')
     self.assertEqual(result[1]['vocabulary_id'], 'AMT')
Beispiel #24
0
    def test_get_chi_square(self):
        #   default dataset_id
        result = QueryCOHD.get_chi_square("192855", "2008271", "Condition")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'chi_square': 306.2816108187519,
                                  'concept_id_1': 192855,
                                  'concept_id_2': 2008271,
                                  'dataset_id': 1,
                                  'p-value': 1.4101531778039801e-68}])

        #   dataset_id == 2
        result = QueryCOHD.get_chi_square("192855", "2008271", "Condition", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'chi_square': 7065.7865572100745,
                                   'concept_id_1': 192855,
                                   'concept_id_2': 2008271,
                                   'dataset_id': 2,
                                   'p-value': 0.0}])

        #   default domain and dataset_id
        result = QueryCOHD.get_chi_square("192855", "2008271")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'chi_square': 306.2816108187519,
                                   'concept_id_1': 192855,
                                   'concept_id_2': 2008271,
                                   'dataset_id': 1,
                                   'p-value': 1.4101531778039801e-68}])

        #   no concept_id_2, default domain and dataset_id
        result = QueryCOHD.get_chi_square("192855")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 768)

        #   no concept_id_2, default dataset_id
        result = QueryCOHD.get_chi_square("192855", "", "Condition")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 226)

        #   no concept_id_2, dataset_id == 2
        result = QueryCOHD.get_chi_square("192855", "", "Condition", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 991)

        #   no concept_id_2, dataset_id == 2, default domain
        result = QueryCOHD.get_chi_square("192855", "", "", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 2735)

        #   invalid concept_id_1 type
        result = QueryCOHD.get_chi_square(192855, "", "", 1)
        self.assertEqual(result, [])

        #   invalid concept_id_2 type
        result = QueryCOHD.get_chi_square("192855", 2008271, "", 1)
        self.assertEqual(result, [])

        #   invalid dataset_id value
        result = QueryCOHD.get_chi_square("192855", "2008271", "condition", 10)
        self.assertEqual(result, [])
    def make_edge_attribute_from_curies(self,
                                        source_curie,
                                        target_curie,
                                        source_name="",
                                        target_name="",
                                        default=0.,
                                        name=""):
        """
        Generic function to make an edge attribute
        :source_curie: CURIE of the source node for the edge under consideration
        :target_curie: CURIE of the target node for the edge under consideration
        :source_name: text name of the source node (in case the KP doesn't understand the CURIE)
        :target: text name of the target node (in case the KP doesn't understand the CURIE)
        :default: default value of the edge attribute
        :name: name of the KP functionality you want to apply
        """
        try:
            # edge attributes
            name = name
            type = "data:0951"
            url = "http://cohd.smart-api.info/"
            value = default

            node_curie_to_type = self.node_curie_to_type
            source_type = node_curie_to_type[source_curie]
            target_type = node_curie_to_type[target_curie]
            # figure out which knowledge provider to use  # TODO: should handle this in a more structured fashion, does there exist a standardized KP API format?
            KP_to_use = None
            for KP in self.who_knows_about_what:
                # see which KP's can label both sources of information
                if self.in_common(
                        source_type,
                        self.who_knows_about_what[KP]) and self.in_common(
                            target_type, self.who_knows_about_what[KP]):
                    KP_to_use = KP
            if KP_to_use == 'COHD':
                # convert CURIE to OMOP identifiers
                source_OMOPs = [
                    str(x['omop_standard_concept_id'])
                    for x in COHD.get_xref_to_OMOP(source_curie, 1)
                ]
                target_OMOPs = [
                    str(x['omop_standard_concept_id'])
                    for x in COHD.get_xref_to_OMOP(target_curie, 1)
                ]
                # FIXME: Super hacky way to get around the fact that COHD can't map CHEMBL drugs
                if source_curie.split('.')[0] == 'CHEMBL':
                    source_OMOPs = [
                        str(x['concept_id']) for x in COHD.find_concept_ids(
                            source_name, domain="Drug", dataset_id=3)
                    ]
                if target_curie.split('.')[0] == 'CHEMBL':
                    target_OMOPs = [
                        str(x['concept_id']) for x in COHD.find_concept_ids(
                            target_name, domain="Drug", dataset_id=3)
                    ]
                # uniquify everything
                source_OMOPs = list(set(source_OMOPs))
                target_OMOPs = list(set(target_OMOPs))

                # Decide how to handle the response from the KP
                if name == 'paired_concept_frequency':
                    # sum up all frequencies  #TODO check with COHD people to see if this is kosher
                    frequency = default
                    for (omop1,
                         omop2) in itertools.product(source_OMOPs,
                                                     target_OMOPs):
                        freq_data = COHD.get_paired_concept_freq(
                            omop1, omop2, 3)  # use the hierarchical dataset
                        if freq_data and 'concept_frequency' in freq_data:
                            frequency += freq_data['concept_frequency']
                    # decorate the edges
                    value = frequency
                elif name == 'observed_expected_ratio':
                    # should probably take the largest obs/exp ratio  # TODO: check with COHD people to see if this is kosher
                    # FIXME: the ln_ratio can be negative, so I should probably account for this, but the object model doesn't like -np.inf
                    value = float(
                        "-inf"
                    )  # FIXME: unclear in object model if attribute type dictates value type, or if value always needs to be a string

                    ###############################
                    # The following code was an experiment to see if it would speed things up, leaving it out for now since it's difficult to quantify if it does speed things up given the cacheing
                    #if len(source_OMOPs) < len(target_OMOPs):
                    #    for omop1 in source_OMOPs:
                    #        omop_to_ln_ratio = dict()
                    #        response = COHD.get_obs_exp_ratio(omop1, domain="", dataset_id=3)  # use the hierarchical dataset
                    #        if response:
                    #            for res in response:
                    #                omop_to_ln_ratio[str(res['concept_id_2'])] = res['ln_ratio']
                    #        for omop2 in target_OMOPs:
                    #            if omop2 in omop_to_ln_ratio:
                    #                temp_value = omop_to_ln_ratio[omop2]
                    #                if temp_value > value:
                    #                    value = temp_value
                    #else:
                    #    for omop1 in target_OMOPs:
                    #        omop_to_ln_ratio = dict()
                    #        response = COHD.get_obs_exp_ratio(omop1, domain="", dataset_id=3)  # use the hierarchical dataset
                    #        if response:
                    #            for res in response:
                    #                omop_to_ln_ratio[str(res['concept_id_2'])] = res['ln_ratio']
                    #        for omop2 in source_OMOPs:
                    #            if omop2 in omop_to_ln_ratio:
                    #                temp_value = omop_to_ln_ratio[omop2]
                    #                if temp_value > value:
                    #                    value = temp_value
                    ###################################

                    for (omop1,
                         omop2) in itertools.product(source_OMOPs,
                                                     target_OMOPs):
                        #print(f"{omop1},{omop2}")
                        response = COHD.get_obs_exp_ratio(
                            omop1, concept_id_2=omop2, domain="",
                            dataset_id=3)  # use the hierarchical dataset
                        # response is a list, since this function is overloaded and can omit concept_id_2, take the first element
                        if response and 'ln_ratio' in response[0]:
                            temp_val = response[0]['ln_ratio']
                            if temp_val > value:
                                value = temp_val
                elif name == 'chi_square':
                    value = float("inf")
                    for (omop1,
                         omop2) in itertools.product(source_OMOPs,
                                                     target_OMOPs):
                        response = COHD.get_chi_square(
                            omop1, concept_id_2=omop2, domain="",
                            dataset_id=3)  # use the hierarchical dataset
                        # response is a list, since this function is overloaded and can omit concept_id_2, take the first element
                        if response and 'p-value' in response[0]:
                            temp_val = response[0]['p-value']
                            if temp_val < value:  # looking at p=values, so lower is better
                                value = temp_val
                # create the edge attribute
                edge_attribute = EdgeAttribute(
                    type=type, name=name, value=str(value), url=url
                )  # populate the edge attribute # FIXME: unclear in object model if attribute type dictates value type, or if value always needs to be a string
                return edge_attribute
            else:
                return None
        except:
            tb = traceback.format_exc()
            error_type, error, _ = sys.exc_info()
            self.response.error(tb, error_code=error_type.__name__)
            self.response.error(
                f"Something went wrong when adding the edge attribute from {KP_to_use}."
            )
Beispiel #26
0
    def test_get_relative_frequency(self):
        #   default dataset_id
        result = QueryCOHD.get_relative_frequency("192855", "2008271", "Procedure")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'concept_2_count': 1494,
                                   'concept_id_1': 192855,
                                   'concept_id_2': 2008271,
                                   'concept_pair_count': 10,
                                   'dataset_id': 1,
                                   'relative_frequency': 0.006693440428380187}])

        #   dataset_id == 2
        result = QueryCOHD.get_relative_frequency("192855", "2008271", "Procedure", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'concept_2_count': 17127,
                                   'concept_id_1': 192855,
                                   'concept_id_2': 2008271,
                                   'concept_pair_count': 196,
                                   'dataset_id': 2,
                                   'relative_frequency': 0.011443918958369825}])

        #   default domain
        result = QueryCOHD.get_relative_frequency("192855", "2008271")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'concept_2_count': 1494,
                                   'concept_id_1': 192855,
                                   'concept_id_2': 2008271,
                                   'concept_pair_count': 10,
                                   'dataset_id': 1,
                                   'relative_frequency': 0.006693440428380187}])

        #   default domain, dataset_id == 2
        result = QueryCOHD.get_relative_frequency("192855", "2008271", "", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'concept_2_count': 17127,
                                   'concept_id_1': 192855,
                                   'concept_id_2': 2008271,
                                   'concept_pair_count': 196,
                                   'dataset_id': 2,
                                   'relative_frequency': 0.011443918958369825}])

        #   default concept_id_2, domain and dataset_id
        result = QueryCOHD.get_relative_frequency("192855")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 768)

        #   default concept_id_2 and domain, dataset_id == 2
        result = QueryCOHD.get_relative_frequency("192855", "", "", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 2735)

        #   default concept_id_2 and dataset_id
        result = QueryCOHD.get_relative_frequency("192855", "", "Procedure")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 159)

        #   default concept_id_2
        result = QueryCOHD.get_relative_frequency("192855", "", "Procedure", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 655)

        #   invalid concept_id_1 type
        result = QueryCOHD.get_relative_frequency(192855, "2008271", "", 2)
        self.assertEqual(result, [])

        #   invalid concept_id_2 type
        result = QueryCOHD.get_relative_frequency("192855", 2008271, "", 2)
        self.assertEqual(result, [])

        #   invalid dataset_id type
        result = QueryCOHD.get_relative_frequency("192855", "2008271", "", "2")
        self.assertEqual(result, [])
Beispiel #27
0
    def test_get_most_frequent_concepts(self):
        #   default domain and dataset_id
        result = QueryCOHD.get_most_frequent_concepts(10)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 10)
        self.assertEqual(result[0], {'concept_class_id': 'Undefined',
                                     'concept_count': 1189172,
                                     'concept_frequency': 0.6641819762950932,
                                     'concept_id': 44814653,
                                     'concept_name': 'Unknown',
                                     'dataset_id': 1,
                                     'domain_id': 'Observation',
                                     'vocabulary_id': 'PCORNet'})

        #   default dataset_id
        result = QueryCOHD.get_most_frequent_concepts(10, "Condition")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 10)
        self.assertEqual(result[0], {
                                     'concept_class_id': 'Clinical Finding',
                                     'concept_count': 233790,
                                     'concept_frequency': 0.1305774978203572,
                                     'concept_id': 320128,
                                     'concept_name': 'Essential hypertension',
                                     'dataset_id': 1,
                                     'domain_id': 'Condition',
                                     'vocabulary_id': 'SNOMED'})

        #   no default value
        result = QueryCOHD.get_most_frequent_concepts(10, "Condition", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 10)
        self.assertEqual(result[0], {'concept_class_id': 'Clinical Finding',
                                     'concept_count': 459776,
                                     'concept_frequency': 0.08570265962394365,
                                     'concept_id': 320128,
                                     'concept_name': 'Essential hypertension',
                                     'dataset_id': 2,
                                     'domain_id': 'Condition',
                                     'vocabulary_id': 'SNOMED'})

        #   invalid num value
        result = QueryCOHD.get_most_frequent_concepts(-10)
        self.assertEqual(result, [])

        #   invalid num type
        result = QueryCOHD.get_most_frequent_concepts("10")
        self.assertEqual(result, [])

        #   invalid domain value
        result = QueryCOHD.get_most_frequent_concepts(10, "Condition1")
        self.assertEqual(result, [])

        #   invalid domain type
        result = QueryCOHD.get_most_frequent_concepts(10, 1, 2)
        self.assertEqual(result, [])

        #   invalid dataset_id value
        result = QueryCOHD.get_most_frequent_concepts(10, "Condition", 10)
        self.assertEqual(result, [])

        #   invalid dataset_id type
        result = QueryCOHD.get_most_frequent_concepts(10, "Condition", "2")
        self.assertEqual(result, [])
Beispiel #28
0
    def test_get_concept_descendants(self):
        result = QueryCOHD.get_concept_descendants('19019073', 'RxNorm', 'Branded Drug', 1)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0], {
                                      "concept_class_id": "Branded Drug",
                                      "concept_code": "206913",
                                      "concept_count": 14744,
                                      "concept_name": "Ibuprofen 600 MG Oral Tablet [Ibu]",
                                      "descendant_concept_id": 19033921,
                                      "domain_id": "Drug",
                                      "max_levels_of_separation": 0,
                                      "min_levels_of_separation": 0,
                                      "standard_concept": "S",
                                      "vocabulary_id": "RxNorm"})

        # default dataset_id
        result = QueryCOHD.get_concept_descendants('19019073', 'RxNorm', 'Branded Drug')
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0], {
                                      "concept_class_id": "Branded Drug",
                                      "concept_code": "206913",
                                      "concept_count": 14853,
                                      "concept_name": "Ibuprofen 600 MG Oral Tablet [Ibu]",
                                      "descendant_concept_id": 19033921,
                                      "domain_id": "Drug",
                                      "max_levels_of_separation": 0,
                                      "min_levels_of_separation": 0,
                                      "standard_concept": "S",
                                      "vocabulary_id": "RxNorm"})

        # default concept_class_id
        result = QueryCOHD.get_concept_descendants('19019073', 'RxNorm')
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 4)
        self.assertEqual(result[0], {
                                      "concept_class_id": "Clinical Drug",
                                      "concept_code": "197806",
                                      "concept_count": 121104,
                                      "concept_name": "Ibuprofen 600 MG Oral Tablet",
                                      "descendant_concept_id": 19019073,
                                      "domain_id": "Drug",
                                      "max_levels_of_separation": 0,
                                      "min_levels_of_separation": 0,
                                      "standard_concept": "S",
                                      "vocabulary_id": "RxNorm"})

        # default vocabulary_id
        result = QueryCOHD.get_concept_descendants('19019073')
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 4)
        self.assertEqual(result[0], {
                                      "concept_class_id": "Clinical Drug",
                                      "concept_code": "197806",
                                      "concept_count": 121104,
                                      "concept_name": "Ibuprofen 600 MG Oral Tablet",
                                      "descendant_concept_id": 19019073,
                                      "domain_id": "Drug",
                                      "max_levels_of_separation": 0,
                                      "min_levels_of_separation": 0,
                                      "standard_concept": "S",
                                      "vocabulary_id": "RxNorm"})
Beispiel #29
0
    def test_get_concept_ancestors(self):
        result = QueryCOHD.get_concept_ancestors('19019073', 'RxNorm', 'Ingredient', 1)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0], {'ancestor_concept_id': 1177480,
                                     'concept_class_id': 'Ingredient',
                                     'concept_code': '5640',
                                     'concept_count': 174,
                                     'concept_name': 'Ibuprofen',
                                     'domain_id': 'Drug',
                                     'max_levels_of_separation': 2,
                                     'min_levels_of_separation': 2,
                                     'standard_concept': 'S',
                                     'vocabulary_id': 'RxNorm'})

        # default dataset_id
        result = QueryCOHD.get_concept_ancestors('19019073', 'RxNorm', 'Ingredient')
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0], {'ancestor_concept_id': 1177480,
                                     'concept_class_id': 'Ingredient',
                                     'concept_code': '5640',
                                     'concept_count': 233514,
                                     'concept_name': 'Ibuprofen',
                                     'domain_id': 'Drug',
                                     'max_levels_of_separation': 2,
                                     'min_levels_of_separation': 2,
                                     'standard_concept': 'S',
                                     'vocabulary_id': 'RxNorm'})

        # default concept_class_id
        result = QueryCOHD.get_concept_ancestors('19019073', 'RxNorm')
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 4)
        self.assertEqual(result[0], {
                                      "ancestor_concept_id": 19019073,
                                      "concept_class_id": "Clinical Drug",
                                      "concept_code": "197806",
                                      "concept_count": 121104,
                                      "concept_name": "Ibuprofen 600 MG Oral Tablet",
                                      "domain_id": "Drug",
                                      "max_levels_of_separation": 0,
                                      "min_levels_of_separation": 0,
                                      "standard_concept": "S",
                                      "vocabulary_id": "RxNorm"})

        # default vocabulary_id
        result = QueryCOHD.get_concept_ancestors('19019073')
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 8)
        self.assertEqual(result[0], {
            "ancestor_concept_id": 19019073,
            "concept_class_id": "Clinical Drug",
            "concept_code": "197806",
            "concept_count": 121104,
            "concept_name": "Ibuprofen 600 MG Oral Tablet",
            "domain_id": "Drug",
            "max_levels_of_separation": 0,
            "min_levels_of_separation": 0,
            "standard_concept": "S",
            "vocabulary_id": "RxNorm"})