def gene_mapping(cls, idx, idx_type, test_mode=False): ''' Load the mapping for the gene index. ''' props = MappingProperties(idx_type) props.add_property("symbol", "string", analyzer="full_name") \ .add_property("synonyms", "string", analyzer="full_name") \ .add_property("chromosome", "string") \ .add_property("source", "string") \ .add_property("start", "long") \ .add_property("stop", "long") \ .add_property("strand", "string") \ .add_property("description", "string") \ .add_property("biotype", "string") \ .add_property("pmids", "string") \ .add_property("suggest", "completion", analyzer="full_name") dbxref_props = cls._get_nested_prop("dbxrefs", "ensembl") ortholog_props = MappingProperties("orthologs") ortholog_props.add_properties(cls._get_nested_prop("mmusculus", "ensembl")) ortholog_props.add_properties(cls._get_nested_prop("rnorvegicus", "ensembl")) dbxref_props.add_properties(ortholog_props) props.add_properties(dbxref_props) tags = MappingProperties("tags") tags.add_property("weight", "integer", index="not_analyzed") props.add_properties(tags) ''' create index and add mapping ''' load = Loader() options = {"indexName": idx, "shards": 5} if not test_mode: load.mapping(props, idx_type, analyzer=Loader.KEYWORD_ANALYZER, **options) return props
def _load_interaction_mappings(cls, section): '''Load the mappings for interactions index type''' interaction_mapping = MappingProperties("interactions", "gene") interaction_mapping.add_property("interactors", "object") interaction_mapping.add_property("interaction_source", "string") load = Loader() idx = section['index'] options = {"indexName": idx, "shards": 1} status = load.mapping(interaction_mapping, "interactions", analyzer=Loader.KEYWORD_ANALYZER, **options) return status
def mapping(cls, idx, idx_type): ''' Load the mapping for the chromosome type in the bands index. seqid - chromosome length - sequence length ''' props = MappingProperties(idx_type) props.add_property("seqid", "string", index="not_analyzed") \ .add_property("length", "integer") ''' create index and add mapping ''' load = Loader() options = {"indexName": idx, "shards": 1} load.mapping(props, idx_type, analyzer=Loader.KEYWORD_ANALYZER, **options) return props
def _load_pathway_mappings(cls, section): '''Function to load the elastic mappings''' idx = section['index'] idx_type = section['index_type'] pathway_mapping = MappingProperties(idx_type) pathway_mapping.add_property("pathway_name", "string") pathway_mapping.add_property("pathway_url", "string") pathway_mapping.add_property("gene_sets", "string") pathway_mapping.add_property("source", "string") pathway_mapping.add_property("is_public", "string") load = Loader() options = {"indexName": idx, "shards": 1} status = load.mapping(pathway_mapping, idx_type, **options) return status
def gene_history_mapping(cls, idx, idx_type, test_mode=False): ''' Load the mapping for the gene index. ''' props = MappingProperties(idx_type) props.add_property("geneid", "integer") \ .add_property("discontinued_geneid", "integer") \ .add_property("discontinued_symbol", "string", analyzer="full_name") \ .add_property("discontinue_date", "date") ''' create index and add mapping ''' load = Loader() options = {"indexName": idx, "shards": 5} if not test_mode: load.mapping(props, idx_type, analyzer=Loader.KEYWORD_ANALYZER, **options) return props
def mapping(cls, idx, idx_type): ''' Create the mapping for disease indexing ''' props = MappingProperties("disease") props.add_property("name", "string") \ .add_property("code", "string") \ .add_property("description", "string", index="not_analyzed") \ .add_property("colour", "string", index="not_analyzed") \ .add_property("tier", "integer", index="not_analyzed") \ .add_property("suggest", "completion", analyzer="full_name") tags = MappingProperties("tags") tags.add_property("weight", "integer", index="not_analyzed") props.add_properties(tags) load = Loader() options = {"indexName": idx, "shards": 1} load.mapping(props, 'disease', analyzer=Loader.KEYWORD_ANALYZER, **options)
def mapping(cls, idx, idx_type, meta): ''' Create the mapping for gwas/ic stats indexing ''' props = MappingProperties(idx_type) props.add_property("seqid", "string") props.add_property("position", "integer") props.add_property("marker", "string", index="not_analyzed") props.add_property("p_value", "double") props.add_property("odds_ratio", "float") props.add_property("lower_or", "float", index="no") props.add_property("upper_or", "float", index="no") props.add_property("raf", "float", index="no") props.add_property("risk_allele", "string", index="no") props.add_property("alt_allele", "string", index="no") props.add_property("imputed", "byte", index="no") load = Loader() options = {"indexName": idx, "shards": 5} load.mapping(props, idx_type, meta=meta, **options)
def mapping(cls, idx, idx_type): ''' Load the mapping for the recombination rates index. seqid - chromosome position recombination_rate genetic_map_position ''' props = MappingProperties(idx_type) props.add_property("seqid", "string", index="not_analyzed") \ .add_property("position", "integer", index="not_analyzed") \ .add_property("recombination_rate", "float", index="not_analyzed") \ .add_property("genetic_map_position", "float", index="not_analyzed") ''' create index and add mapping ''' load = Loader() options = {"indexName": idx, "shards": 2} load.mapping(props, idx_type, **options) return props
def create_criteria_mapping(cls, idx, idx_type, test_mode=False): ''' function to create mapping for criteria indexes @type idx: string @param idx: name of the index @type idx_type: string @param idx_type: name of the idx type, each criteria is an index type @type test_mode: string @param test_mode: flag to create or not create the mapping ''' logger.warning('Idx ' + idx) logger.warning('Idx_type ' + idx_type) ''' Create the mapping for alias indexing ''' props = MappingProperties(idx_type) props.add_property("score", "integer") props.add_property("disease_tags", "string", index="not_analyzed") props.add_property("qid", "string", index="not_analyzed") (main_codes, other_codes) = CriteriaManager().get_available_diseases() for disease in main_codes + other_codes: criteria_tags = MappingProperties(disease) criteria_tags.add_property("fid", "string", index="not_analyzed") criteria_tags.add_property("fname", "string", index="not_analyzed") fnotes = MappingProperties('fnotes') fnotes.add_property('linkid', "string", index="not_analyzed") fnotes.add_property('linkname', "string", index="not_analyzed") fnotes.add_property('linkdata', "string", index="not_analyzed") fnotes.add_property('linkvalue', "string", index="not_analyzed") criteria_tags.add_properties(fnotes) props.add_properties(criteria_tags) ''' create index and add mapping ''' load = Loader() options = {"indexName": idx, "shards": 5} '''add meta info''' config = CriteriaManager.get_criteria_config() idx_type_cfg = config[idx_type] desc = idx_type_cfg['desc'] meta = {"desc": desc} if not test_mode: load.mapping(props, idx_type, meta=meta, analyzer=Loader.KEYWORD_ANALYZER, **options) return props
def mapping(cls, idx, idx_type): ''' Load the mapping for the cytobands index. seqid - chromosome start - start position stop - stop position name - cytoband name gieStain - Giemsa stain results. gneg, gpos50, gpos75, gpos25, gpos100, acen, gvar, stalk ''' props = MappingProperties(idx_type) props.add_property("seqid", "string", index="not_analyzed") \ .add_property("start", "integer") \ .add_property("stop", "integer") \ .add_property("name", "string", analyzer="full_name") \ .add_property("giestain", "string", index="not_analyzed") ''' create index and add mapping ''' load = Loader() options = {"indexName": idx, "shards": 2} load.mapping(props, idx_type, analyzer=Loader.KEYWORD_ANALYZER, **options) return props
def test_mapping_parent_child(self): ''' Test creating mapping with parent child relationship. ''' gene_mapping = MappingProperties("gene") gene_mapping.add_property("symbol", "string", analyzer="full_name") inta_mapping = MappingProperties("publication", "gene") load = Loader() idx = "test__mapping__"+SEARCH_SUFFIX options = {"indexName": idx, "shards": 1} requests.delete(ElasticSettings.url() + '/' + idx) # add child mappings first status = load.mapping(inta_mapping, "publication", analyzer=Loader.KEYWORD_ANALYZER, **options) self.assertTrue(status, "mapping inteactions") status = load.mapping(gene_mapping, "gene", analyzer=Loader.KEYWORD_ANALYZER, **options) self.assertTrue(status, "mapping genes") ''' load docs and test has parent query''' json_data = '{"index": {"_index": "%s", "_type": "gene", "_id" : "1"}}\n' % idx json_data += json.dumps({"symbol": "PAX1"}) + '\n' json_data += '{"index": {"_index": "%s", "_type": "publication", "_id" : "2", "parent": "1"}}\n' % idx json_data += json.dumps({"pubmed": 1234}) + '\n' Bulk.load(idx, '', json_data) Search.index_refresh(idx) query = ElasticQuery.has_parent('gene', Query.match('symbol', 'PAX1')) elastic = Search(query, idx=idx, idx_type='publication', size=500) docs = elastic.search().docs self.assertEquals(len(docs), 1) self.assertEquals(getattr(docs[0], 'pubmed'), 1234) self.assertEquals(docs[0].parent(), '1') self.assertRaises(QueryError, ElasticQuery.has_parent, 'gene', 'xxxxx') ''' test has child query ''' query = ElasticQuery.has_child('publication', Query.match('pubmed', 1234)) elastic = Search(query, idx=idx, idx_type='gene', size=500) docs = elastic.search().docs self.assertEquals(len(docs), 1) self.assertEquals(getattr(docs[0], 'symbol'), 'PAX1') self.assertEquals(docs[0].parent(), None) requests.delete(ElasticSettings.url() + '/' + idx)
def test_mapping_error(self): self.assertRaises(LoaderError, Loader().mapping, 'MappingProperties', '') self.assertRaises(MappingError, MappingProperties('').add_properties, 'MappingProperties')