def test_populate_container(self): features = ['ENSG00000110800'] diseases = ['T1D'] fid = 'GDXHsS00004' fname = 'Barrett' fnotes = {'linkdata': 'somedata', 'linkvalue': 'somevalue'} result_container = Criteria.populate_container(fid, fname, fnotes, features, diseases, result_container={}) expected_result = {'ENSG00000110800': {'T1D': [{'fid': 'GDXHsS00004', 'fname': 'Barrett', 'fnotes': {'linkvalue': 'somevalue', 'linkdata': 'somedata'}}]}} self.assertEqual(expected_result, result_container, 'Expected result from populate container') features = ['ENSG00000110800'] diseases = ['RA'] fid = 'GDXHsS00003' fname = 'Barrett' fnotes = {'linkdata': 'somedata2', 'linkvalue': 'somevalue2'} result_container_populated_again = Criteria.populate_container(fid, fname, fnotes, features, diseases, result_container=result_container) expected_result_populated_again = {'ENSG00000110800': {'RA': [{'fname': 'Barrett', 'fid': 'GDXHsS00003', 'fnotes': {'linkvalue': 'somevalue2', 'linkdata': 'somedata2'}}], 'T1D': [{'fname': 'Barrett', 'fid': 'GDXHsS00004', 'fnotes': {'linkvalue': 'somevalue', 'linkdata': 'somedata'}}]}} self.assertEqual(result_container_populated_again, expected_result_populated_again, 'Expected result after populating')
def test_get_criteria_dict(self): expected_dict = {'fid': 'GDXHsS00004', 'fname': 'Barrett'} criteria_dict = Criteria.get_criteria_dict('GDXHsS00004', 'Barrett') self.assertEqual(expected_dict, criteria_dict, 'dicts are equal') expected_dict = {'fid': 'GDXHsS00004', 'fnotes': {'rsq': '0.1'}, 'fname': 'Barrett'} criteria_dict = Criteria.get_criteria_dict('GDXHsS00004', 'Barrett', {'rsq': '0.1'}) self.assertEqual(expected_dict, criteria_dict, 'dicts are equal')
def test_get_elastic_query(self): config = IniParser().read_ini(MY_INI_FILE) section = "is_gene_in_mhc" range_query = Criteria.get_elastic_query(section, config) range_query_dict = range_query.__dict__ self.assertTrue('range' in str(range_query_dict)) section = "cand_gene_in_study" match_all_query = Criteria.get_elastic_query(section, config) match_all_query_dict = match_all_query.__dict__ self.assertTrue('match_all' in str(match_all_query_dict))
def test_available_criterias(self): feature = 'gene' available_criterias = Criteria.get_available_criterias(feature, INI_CONFIG) expected_dict = {'gene': ['cand_gene_in_study', 'gene_in_region', 'is_gene_in_mhc', 'cand_gene_in_region']} self.assertIsNotNone(available_criterias, 'Criterias as not none') self.assertIn('cand_gene_in_study', available_criterias['gene']) self.assertEqual(available_criterias.keys(), expected_dict.keys(), 'Dic keys equal') available_criterias = Criteria.get_available_criterias(feature=None, config=INI_CONFIG) self.assertIn('gene', available_criterias) self.assertIn('marker', available_criterias)
def show_feature_criteria_details(feature_id, feature_type, feature_doc=None, section='criteria', section_title="criteria"): ''' Template inclusion tag to render criteria details bar. ''' print('===================') print(feature_id) print(feature_type) print('====================') (idx, idx_types) = Criteria.get_feature_idx_n_idxtypes(feature_type) criteria_disease_tags = Criteria.get_all_criteria_disease_tags([feature_id], idx, idx_types) print(criteria_disease_tags) return {'criteria': criteria_disease_tags, 'feature_id': feature_id, 'appname': feature_type, 'f': feature_doc, 'section': section, 'section_title': section_title}
def test_get_feature_idx_n_idxtypes(self): (idx, idx_types) = Criteria.get_feature_idx_n_idxtypes('gene') self.assertEqual('pydgin_imb_criteria_gene', idx, 'Got the right idx back') self.assertIn('cand_gene_in_study', idx_types, 'Got the right idx type back') self.assertIn('cand_gene_in_region', idx_types, 'Got the right idx type back') (idx, idx_types) = Criteria.get_feature_idx_n_idxtypes('marker') self.assertEqual('pydgin_imb_criteria_marker', idx, 'Got the right idx back') self.assertIn('is_an_index_snp', idx_types, 'Got the right idx type back') self.assertIn('marker_is_gwas_significant_in_study', idx_types, 'Got the right idx type back') self.assertIn('is_marker_in_mhc', idx_types, 'Got the right idx type back')
def get_criteria_details(cls, feature_id, idx=None, idx_type=None, config=None): 'Function to get the criteria details for a given feature_id' if idx is None: idx = ElasticSettings.idx(cls.FEATURE_TYPE.upper()+'_CRITERIA') # get all the criterias from ini criteria_list = [] if idx_type is None: available_criterias = cls.get_available_criterias(feature=cls.FEATURE_TYPE, config=config) criteria_list = available_criterias[cls.FEATURE_TYPE] idx_type = ','.join(criteria_list) result_dict = Criteria.get_criteria_details(feature_id, idx, idx_type) result_dict_expanded = Criteria.add_meta_info(idx, criteria_list, result_dict) return result_dict_expanded
def get_diseases(self): ''' Overridden get diseases for feature. ''' if super(RegionDocument, self).get_diseases(): idx = ElasticSettings.idx('REGION_CRITERIA') diseases = [getattr(d, "code") for d in Criteria.get_disease_tags(getattr(self, "region_id"), idx=idx)] return diseases return []
def test_gene_criteria_types(self): """Test if the indexes have records""" idx_key = "GENE_CRITERIA" feature_type = "gene" idx = ElasticSettings.idx(idx_key) idx_types = CriteriaDataIntegrityUtils.get_criteria_index_types(idx_key) gene_criterias = Criteria.get_available_criterias(feature_type) CriteriaDataIntegrityTestUtils().test_criteria_types(idx, idx_types, gene_criterias["gene"]) CriteriaDataIntegrityTestUtils().test_criteria_mappings(idx, idx_types) # get random doc for each type ['gene_in_region', 'cand_gene_in_region', 'cand_gene_in_study', 'is_gene_in_mhc'] idx_type = "gene_in_region" doc_by_idx_type = ElasticUtils.get_rdm_docs(idx, idx_type, size=1) self.assertTrue(len(doc_by_idx_type) == 1, "got back one document") gene_in_region_doc = doc_by_idx_type[0] # {'score': 10, 'CRO': [{'fname': '4p11', 'fid': '4p11_005'}], # '_meta': {'_type': 'gene_in_region', '_score': 0.9997835, # '_index': 'pydgin_imb_criteria_gene', '_id': 'ENSG00000250753'}, # 'disease_tags': ['CRO'], 'qid': 'ENSG00000250753'} qid = getattr(gene_in_region_doc, "qid") print(qid) disease_tags = getattr(gene_in_region_doc, "disease_tags") # ENSG00000248482 # ['IBD', 'UC'] # [{'fid': '5q31.1_013', 'fname': '5q31.1'}] # [{'fid': '5q31.1_013', 'fname': '5q31.1'}] fnotes = getattr(gene_in_region_doc, disease_tags[0]) region_id = fnotes[0]["fid"] print(region_id)
def get_diseases(self): ''' Overridden get diseases for feature. ''' if super(StudyDocument, self).get_diseases(): diseases = [getattr(d, "code") for d in Criteria.get_disease_tags(self.get_name(), idx=ElasticSettings.idx('STUDY_CRITERIA'))] return diseases return []
def test_tag_feature_to_all_diseases(self): config = IniParser().read_ini(MY_INI_FILE) section = "is_gene_in_mhc" feature_id = 'ENSG00000229281' result = Criteria.tag_feature_to_all_diseases(feature_id, section, config, {}) result_diseases = sorted(list(result['ENSG00000229281'].keys())) (core_disease, other_disease) = CriteriaManager.get_available_diseases() available_diseases = sorted(core_disease + other_disease) self.assertEqual(result_diseases, available_diseases) section = "is_marker_in_mhc" feature_id = 'rs6679677' result = Criteria.tag_feature_to_all_diseases(feature_id, section, config, {}) result_diseases = sorted(list(result['rs6679677'].keys())) (core_disease, other_disease) = CriteriaManager.get_available_diseases() available_diseases = sorted(core_disease + other_disease) self.assertEqual(result_diseases, available_diseases)
def test_fetch_overlapping_features(self): region_index = ElasticSettings.idx('REGION', idx_type='STUDY_HITS') (region_idx, region_idx_type) = region_index.split('/') seqid = '1' start = 206767602 stop = 206772494 result_docs = Criteria.fetch_overlapping_features('38', seqid, start, stop, region_idx, region_idx_type) self.assertTrue(len(result_docs) > 0, 'Got some overlapping features')
def test_get_meta_desc(self): idx = 'pydgin_imb_criteria_gene' criteria_list = ['cand_gene_in_study', 'is_gene_in_mhc'] meta_info = Criteria.get_meta_desc(idx, criteria_list) self.assertEqual(meta_info[idx]['is_gene_in_mhc'], 'Gene lies in MHC region', 'Got the right desc for is_gene_in_mhc') self.assertEqual(meta_info[idx]['cand_gene_in_study'], 'Candidate Gene for a Study', 'Got the right desc for cand_gene_in_study')
def get_all_criteria_disease_tags(cls, qids, idx_type=None): (idx, idx_types) = cls.get_feature_idx_n_idxtypes(cls.FEATURE_TYPE) if idx_type is None: idx_type = idx_types criteria_disease_tags = Criteria.get_all_criteria_disease_tags(qids, idx, idx_type) return(criteria_disease_tags)
def test_do_identifier_search(self): identifiers = ['ptpn22', 'rs2476601', '1p13.2', 'ctla4', 'GDXHsS00025', 'foo', 'bar'] result_dict = Criteria.do_identifier_search(identifiers) expected_dict = {'region': {'1p13.2': ['1p13.2_019']}, 'marker': {'rs2476601': ['rs2476601']}, 'gene': {'PTPN22': ['ENSG00000134242'], 'CTLA4': ['ENSG00000163599']}, 'study': {'GDXHsS00025': ['GDXHsS00025']}, 'missing': ['foo', 'bar']} self.assertEqual(result_dict, expected_dict, 'Got back correct results')
def process_criterias(cls, feature, criteria=None, config=None, show=False, test=False): '''function to delegate the call to the right criteria class and build the criteria for that class ''' from criteria.helper.criteria import Criteria from criteria.helper.gene_criteria import GeneCriteria from criteria.helper.marker_criteria import MarkerCriteria from criteria.helper.region_criteria import RegionCriteria from criteria.helper.study_criteria import StudyCriteria if config is None: if test: config = cls.get_criteria_config(ini_file='test_criteria.ini') else: config = cls.get_criteria_config(ini_file='criteria.ini') available_criterias = Criteria.get_available_criterias(feature, config=config, test=test)[feature] criterias_to_process = [] if criteria is None: criterias_to_process = available_criterias else: criterias_list = criteria.split(',') criterias_to_process = [criteria.strip() for criteria in criterias_list if criteria.strip() in available_criterias] if show: print(criterias_to_process) return criterias_to_process logger.debug(datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')) for section in criterias_to_process: if feature == 'gene': print('Call to build criteria gene index') Criteria.process_criteria(feature, section, config, GeneCriteria, test=test) elif feature == 'marker': print('Call to build criteria marker index') Criteria.process_criteria(feature, section, config, MarkerCriteria, test=test) elif feature == 'region': print('Call to build criteria region index') Criteria.process_criteria(feature, section, config, RegionCriteria, test=test) elif feature == 'study': print('Call to build criteria study index') Criteria.process_criteria(feature, section, config, StudyCriteria, test=test) else: logger.critical('Unsupported feature ... please check the inputs') logger.debug(datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')) logger.debug('========DONE==========')
def get_available_criterias(cls, feature=None, config=None): 'Function to get available criterias for region' if config is None: config = CriteriaManager.get_criteria_config() if feature is None: feature = cls.FEATURE_TYPE available_criterias = Criteria.get_available_criterias(feature, config) return available_criterias
def test_get_link_info(self): idx = 'pydgin_imb_criteria_gene' criteria_list = ['cand_gene_in_study', 'is_gene_in_mhc', 'cand_gene_in_region'] link_info = Criteria.get_link_info(idx, criteria_list) self.assertEqual(link_info[idx]['cand_gene_in_study'], 'study', 'Got the right link to feature for cand_gene_in_study') self.assertEqual(link_info[idx]['cand_gene_in_region'], 'region', 'Got the right link to feature for cand_gene_in_region') self.assertEqual(link_info[idx]['is_gene_in_mhc'], 'gene', 'Got the right link to feature for is_gene_in_mhc')
def test_marker_criteria_types(self): '''Test if the indexes have records''' idx_key = 'MARKER_CRITERIA' feature_type = 'marker' idx = ElasticSettings.idx(idx_key) idx_types = CriteriaDataIntegrityUtils.get_criteria_index_types(idx_key) gene_criterias = Criteria.get_available_criterias(feature_type) CriteriaDataIntegrityTestUtils().test_criteria_types(idx, idx_types, gene_criterias['gene']) CriteriaDataIntegrityTestUtils().test_criteria_mappings(idx, idx_types) # get random doc for each type ['gene_in_region', 'cand_gene_in_region', 'cand_gene_in_study', 'is_gene_in_mhc'] idx_type = 'rsq_with_index_snp' doc_by_idx_type = ElasticUtils.get_rdm_docs(idx, idx_type, size=1) self.assertTrue(len(doc_by_idx_type) == 1, 'got back one document')
def test_get_criteria_disease_dict(self): criteria_dict = Criteria.get_criteria_dict('GDXHsS00004', 'Barrett') diseases = ['T1D'] criteria_disease_dict = Criteria.get_criteria_disease_dict(diseases, criteria_dict, {}) expected_dict = {'T1D': [{'fname': 'Barrett', 'fid': 'GDXHsS00004'}]} self.assertEqual(criteria_disease_dict, expected_dict, 'Dict as expected') criteria_disease_dict = Criteria.get_criteria_disease_dict(diseases, criteria_dict, criteria_disease_dict) self.assertEqual(criteria_disease_dict, expected_dict, 'Dict as expected after addding duplicate') criteria_dict = Criteria.get_criteria_dict('GDXHsS00005', 'Catfield') expected_dict = {'T1D': [{'fname': 'Barrett', 'fid': 'GDXHsS00004'}, {'fname': 'Catfield', 'fid': 'GDXHsS00005'}]} criteria_disease_dict = Criteria.get_criteria_disease_dict(diseases, criteria_dict, criteria_disease_dict) self.assertEqual(criteria_disease_dict, expected_dict, 'Dict as expected after adding new') diseases = ['T1D', 'MS'] criteria_disease_dict = Criteria.get_criteria_disease_dict(diseases, criteria_dict, criteria_disease_dict) expected_dict = {'MS': [{'fname': 'Catfield', 'fid': 'GDXHsS00005'}], 'T1D': [{'fname': 'Barrett', 'fid': 'GDXHsS00004'}, {'fname': 'Catfield', 'fid': 'GDXHsS00005'}]} self.assertEqual(criteria_disease_dict, expected_dict, 'Dict as expected after adding diseases')
def test_calculate_score(self): disease_list = ["AA", "T1D"] score = Criteria.calculate_score(disease_list) self.assertEqual(score, 15, "Got back the right score")
def filter_queryset(self, request, queryset, view): ''' Override this method to request just the documents required from elastic. ''' q_size = view.paginator.get_limit(request) q_from = view.paginator.get_offset(request) filterable = getattr(view, 'filter_fields', []) filters = dict([(k, v) for k, v in request.GET.items() if k in filterable]) criteria_idx = self._get_index(filters.get('feature_type', 'GENE_CRITERIA')) feature_type = filters.get('feature_type') feature_id = filters.get('feature_id') aggregate = filters.get('aggregate') detail = filters.get('detail') idx = criteria_idx if type(criteria_idx) == list: idx = ','.join(ElasticSettings.idx(name) for name in criteria_idx) else: idx = ElasticSettings.idx(criteria_idx) results = [] if feature_id and aggregate == 'true': disease_doc_tags = Criteria.get_disease_tags(feature_id, idx=idx) disease_tags = [getattr(d, 'code') for d in disease_doc_tags] new_obj = ElasticObject() new_obj.qid = feature_id new_obj.disease_tags = disease_tags new_obj.criteria_type = None results.append(new_obj) return results elif feature_id and detail == 'true': (idx, idx_types) = Criteria.get_feature_idx_n_idxtypes(feature_type.lower()) criteria_details = Criteria.get_criteria_details(feature_id, idx=idx, idx_type=idx_types) criteria_list = idx_types.split(',') criteria_details_expanded = Criteria.add_meta_info(idx, criteria_list, criteria_details) feature_details = self._get_feature_details(criteria_details_expanded) for criteria, details in feature_details.items(): print(criteria) new_obj = ElasticObject() new_obj.qid = details['qid'] new_obj.criteria_type = criteria disease_tags = details['disease_tags'] fdetails = list(details['fdetails']) print('+++++++++++++++') print(disease_tags) print(fdetails) print('+++++++++++++++') new_obj.disease_tags = disease_tags new_obj.feature_details = fdetails results.append(new_obj) return results else: q = ElasticQuery(Query.match_all()) s = Search(search_query=q, idx=idx, size=q_size, search_from=q_from) json_results = s.get_json_response() for result in json_results['hits']['hits']: new_obj = ElasticObject(initial=result['_source']) new_obj.uuid = result['_id'] new_obj.criteria_type = result['_type'] results.append(new_obj) view.es_count = json_results['hits']['total'] return results
def get_diseases(self): if super(MarkerDocument, self).get_diseases(): diseases = [getattr(d, "code") for d in Criteria.get_disease_tags(self.get_name(), idx=ElasticSettings.idx('MARKER_CRITERIA'))] return diseases return []
def get_diseases(self): if super(GeneDocument, self).get_diseases(): diseases = [getattr(d, "code") for d in Criteria.get_disease_tags(getattr(self, "dbxrefs")['ensembl'], idx=ElasticSettings.idx('GENE_CRITERIA'))] return diseases return []
def test_do_criteria_search(self): identifiers = ['ptpn22', 'rs2476601', '1p13.2', 'ctla4', 'GDXHsS00025', 'foo', 'bar'] criteria_disease_tags = Criteria.do_criteria_search(identifiers) print(criteria_disease_tags)
def get_disease_tags(cls, feature_id, idx_type=None): 'Function to get disease tags for a given feature_id...delegated to parent class Criteria. Returns disease docs' idx = ElasticSettings.idx(cls.FEATURE_TYPE.upper()+'_CRITERIA') docs = Criteria.get_disease_tags(feature_id, idx, idx_type) return docs
def get_disease_codes_from_results(cls, criteria_results): idx = ElasticSettings.idx(cls.FEATURE_TYPE.upper()+'_CRITERIA') codes = Criteria.get_disease_codes_from_results(idx, criteria_results) return sorted(codes)
def test_get_meta_info(self): idx = 'pydgin_imb_criteria_gene' idx_type = 'cand_gene_in_study' meta_info = Criteria.get_meta_info(idx, idx_type) self.assertEqual(meta_info['desc'], 'Candidate Gene for a Study', 'Got the right meta info')