Esempio n. 1
0
    def gene_mapping(cls, idx, idx_type, test_mode=False):
        ''' Load the mapping for the gene index. '''
        props = MappingProperties(idx_type)
        props.add_property("symbol", "string", analyzer="full_name") \
             .add_property("synonyms", "string", analyzer="full_name") \
             .add_property("chromosome", "string") \
             .add_property("source", "string") \
             .add_property("start", "long") \
             .add_property("stop", "long") \
             .add_property("strand", "string") \
             .add_property("description", "string") \
             .add_property("biotype", "string") \
             .add_property("pmids", "string") \
             .add_property("suggest", "completion",
                           index_analyzer="full_name", search_analyzer="full_name")

        dbxref_props = cls._get_nested_prop("dbxrefs", "ensembl")
        ortholog_props = MappingProperties("orthologs")
        ortholog_props.add_properties(cls._get_nested_prop("mmusculus", "ensembl"))
        ortholog_props.add_properties(cls._get_nested_prop("rnorvegicus", "ensembl"))
        dbxref_props.add_properties(ortholog_props)
        props.add_properties(dbxref_props)

        ''' create index and add mapping '''
        load = Loader()
        options = {"indexName": idx, "shards": 5}
        if not test_mode:
            load.mapping(props, idx_type, analyzer=Loader.KEYWORD_ANALYZER, **options)
        return props
 def _load_interaction_mappings(cls, section):
     '''Load the mappings for interactions index type'''
     interaction_mapping = MappingProperties("interactions", "gene")
     interaction_mapping.add_property("interactors", "object")
     interaction_mapping.add_property("interaction_source", "string")
     load = Loader()
     idx = section['index']
     options = {"indexName": idx, "shards": 1}
     status = load.mapping(interaction_mapping, "interactions", analyzer=Loader.KEYWORD_ANALYZER, **options)
     return status
Esempio n. 3
0
 def _create_rs_merge_mapping(self, idx_type, **options):
     ''' Create the mapping for rs index '''
     props = MappingProperties(idx_type)
     props.add_property("rshigh", "string", index="not_analyzed") \
          .add_property("rslow", "string", index="not_analyzed") \
          .add_property("build_id", "integer", index="no") \
          .add_property("orien", "integer", index="no") \
          .add_property("create_time", "date", index="no", property_format="yyyy-MM-dd HH:mm:ss.SSS") \
          .add_property("last_updated_time", "date", index="no", property_format="yyyy-MM-dd HH:mm:ss.SSS") \
          .add_property("rscurrent", "string", index="not_analyzed") \
          .add_property("orien2current", "string", index="no") \
          .add_property("notes", "string", index="no")
     self.mapping(props, idx_type, **options)
     return props
Esempio n. 4
0
 def _create_gff_mapping(self, idx_type, **options):
     ''' Create the mapping for gff index '''
     props = MappingProperties(idx_type)
     props.add_property("seqid", "string", index="not_analyzed") \
          .add_property("source", "string") \
          .add_property("type", "string", index="not_analyzed") \
          .add_property("start", "integer", index="not_analyzed") \
          .add_property("end", "integer", index="not_analyzed") \
          .add_property("score", "string", index="no") \
          .add_property("strand", "string", index="no") \
          .add_property("phase", "string", index="no") \
          .add_property("attr", "object")
     self.mapping(props, idx_type, **options)
     return props
Esempio n. 5
0
    def mapping(cls, idx, idx_type):
        ''' Load the mapping for the chromosome type in the bands index.
        seqid    - chromosome
        length   - sequence length
        '''
        props = MappingProperties(idx_type)
        props.add_property("seqid", "string", index="not_analyzed") \
             .add_property("length", "integer")

        ''' create index and add mapping '''
        load = Loader()
        options = {"indexName": idx, "shards": 1}
        load.mapping(props, idx_type, analyzer=Loader.KEYWORD_ANALYZER, **options)
        return props
Esempio n. 6
0
    def gene_history_mapping(cls, idx, idx_type, test_mode=False):
        ''' Load the mapping for the gene index. '''
        props = MappingProperties(idx_type)
        props.add_property("geneid", "integer") \
             .add_property("discontinued_geneid", "integer") \
             .add_property("discontinued_symbol", "string", analyzer="full_name") \
             .add_property("discontinue_date", "date")

        ''' create index and add mapping '''
        load = Loader()
        options = {"indexName": idx, "shards": 5}
        if not test_mode:
            load.mapping(props, idx_type, analyzer=Loader.KEYWORD_ANALYZER, **options)
        return props
Esempio n. 7
0
    def _create_mapping(self, **options):
        ''' Create the mapping for gene names indexing '''
        props = MappingProperties("gene")
        props.add_property("gene_symbol", "string", analyzer="full_name") \
             .add_property("biotype", "string") \
             .add_property("synonyms", "string", analyzer="full_name") \
             .add_property("hgnc", "string") \
             .add_property("dbxrefs", "object") \
             .add_property("organism", "string")

        featureloc_props = MappingProperties("featureloc")
        featureloc_props.add_property("start", "integer") \
                        .add_property("end", "integer") \
                        .add_property("seqid", "string") \
                        .add_property("build", "string")
        props.add_properties(featureloc_props)
        ''' create index and add mapping '''
        self.mapping(props, 'gene', analyzer=self.KEYWORD_ANALYZER, **options)
Esempio n. 8
0
 def _create_snp_mapping(self, idx_type, **options):
     ''' Create the mapping for snp index '''
     props = MappingProperties(idx_type)
     props.add_property("seqid", "string", index="not_analyzed") \
          .add_property("start", "integer", index="not_analyzed") \
          .add_property("id", "string", analyzer="full_name") \
          .add_property("ref", "string", index="no") \
          .add_property("alt", "string", index="no") \
          .add_property("qual", "string", index="no") \
          .add_property("filter", "string", index="no") \
          .add_property("info", "string", index="no") \
          .add_property("suggest", "completion", analyzer="full_name")
     tags = MappingProperties("tags")
     tags.add_property("weight", "integer", index="not_analyzed")
     props.add_properties(tags)
     self.mapping(props,
                  idx_type,
                  analyzer=Loader.KEYWORD_ANALYZER,
                  **options)
     return props
Esempio n. 9
0
    def mapping(cls, idx, idx_type):
        ''' Load the mapping for the cytobands index.
        seqid    - chromosome
        start    - start position
        stop     - stop position
        name     - cytoband name
        gieStain - Giemsa stain results. gneg, gpos50, gpos75, gpos25, gpos100, acen, gvar, stalk
        '''
        props = MappingProperties(idx_type)
        props.add_property("seqid", "string", index="not_analyzed") \
             .add_property("start", "integer") \
             .add_property("stop", "integer") \
             .add_property("name", "string", analyzer="full_name") \
             .add_property("giestain", "string", index="not_analyzed")

        ''' create index and add mapping '''
        load = Loader()
        options = {"indexName": idx, "shards": 2}
        load.mapping(props, idx_type, analyzer=Loader.KEYWORD_ANALYZER, **options)
        return props
Esempio n. 10
0
    def _create_gene_mapping(self, **options):
        ''' Create the mapping for gene target index '''
        props = MappingProperties("gene_target")
        props.add_property("ensg", "string", index="not_analyzed") \
             .add_property("name", "string", index="not_analyzed") \
             .add_property("biotype", "string", index="not_analyzed") \
             .add_property("strand", "string", index="no") \
             .add_property("baitChr", "string", index="not_analyzed") \
             .add_property("baitStart", "integer", index="not_analyzed") \
             .add_property("baitEnd", "integer", index="not_analyzed") \
             .add_property("baitID", "string", index="no") \
             .add_property("baitName", "string", index="no") \
             .add_property("oeChr", "string", index="not_analyzed") \
             .add_property("oeStart", "integer", index="not_analyzed") \
             .add_property("oeEnd", "integer", index="not_analyzed") \
             .add_property("oeID", "string", index="no") \
             .add_property("oeName", "string", index="no") \
             .add_property("dist", "integer", index="not_analyzed")

        meta = {"tissue_type": {}}
        for tt in GeneTargetManager.tissue_types:
            props.add_property(tt, "float")
            meta["tissue_type"][tt] = "tissue_type"

        self.mapping(props, idx_type='gene_target', meta=meta, **options)
    def test_mapping_parent_child(self):
        ''' Test creating mapping with parent child relationship. '''
        gene_mapping = MappingProperties("gene")
        gene_mapping.add_property("symbol", "string", analyzer="full_name")
        inta_mapping = MappingProperties("publication", "gene")
        load = Loader()
        idx = "test__mapping__"+SEARCH_SUFFIX
        options = {"indexName": idx, "shards": 1}
        requests.delete(ElasticSettings.url() + '/' + idx)

        # add child mappings first
        status = load.mapping(inta_mapping, "publication", analyzer=Loader.KEYWORD_ANALYZER, **options)
        self.assertTrue(status, "mapping inteactions")
        status = load.mapping(gene_mapping, "gene", analyzer=Loader.KEYWORD_ANALYZER, **options)
        self.assertTrue(status, "mapping genes")

        ''' load docs and test has parent query'''
        json_data = '{"index": {"_index": "%s", "_type": "gene", "_id" : "1"}}\n' % idx
        json_data += json.dumps({"symbol": "PAX1"}) + '\n'
        json_data += '{"index": {"_index": "%s", "_type": "publication", "_id" : "2", "parent": "1"}}\n' % idx
        json_data += json.dumps({"pubmed": 1234}) + '\n'
        Bulk.load(idx, '', json_data)
        Search.index_refresh(idx)
        query = ElasticQuery.has_parent('gene', Query.match('symbol', 'PAX1'))
        elastic = Search(query, idx=idx, idx_type='publication', size=500)
        docs = elastic.search().docs
        self.assertEquals(len(docs), 1)
        self.assertEquals(getattr(docs[0], 'pubmed'), 1234)
        self.assertEquals(docs[0].parent(), '1')
        self.assertRaises(QueryError, ElasticQuery.has_parent, 'gene', 'xxxxx')

        ''' test has child query '''
        query = ElasticQuery.has_child('publication', Query.match('pubmed', 1234))
        elastic = Search(query, idx=idx, idx_type='gene', size=500)
        docs = elastic.search().docs
        self.assertEquals(len(docs), 1)
        self.assertEquals(getattr(docs[0], 'symbol'), 'PAX1')
        self.assertEquals(docs[0].parent(), None)
        requests.delete(ElasticSettings.url() + '/' + idx)
Esempio n. 12
0
    def mapping(cls, idx, idx_type):
        ''' Create the mapping for disease indexing '''
        props = MappingProperties("disease")
        props.add_property("name", "string") \
             .add_property("code", "string") \
             .add_property("description", "string", index="not_analyzed") \
             .add_property("colour", "string", index="not_analyzed") \
             .add_property("tier", "integer", index="not_analyzed") \
             .add_property("suggest", "completion", analyzer="full_name")

        tags = MappingProperties("tags")
        tags.add_property("weight", "integer", index="not_analyzed")
        props.add_properties(tags)
        load = Loader()
        options = {"indexName": idx, "shards": 1}
        load.mapping(props, 'disease', analyzer=Loader.KEYWORD_ANALYZER, **options)
Esempio n. 13
0
 def _create_json_mapping(self, idx_type, mapping, **options):
     ''' Create the mapping for indexing '''
     props = MappingProperties(idx_type)
     props.mapping_properties[idx_type].update(mapping)
     self.mapping(props, idx_type, **options)
Esempio n. 14
0
    def mapping(cls, idx, idx_type, meta):
        ''' Create the mapping for gwas/ic stats indexing '''
        props = MappingProperties(idx_type)
        props.add_property("seqid", "string")
        props.add_property("position", "integer")
        props.add_property("marker", "string", index="not_analyzed")
        props.add_property("p_value", "double")
        props.add_property("odds_ratio", "float")
        props.add_property("lower_or", "float", index="no")
        props.add_property("upper_or", "float", index="no")
        props.add_property("raf", "float", index="no")
        props.add_property("risk_allele", "string", index="no")
        props.add_property("alt_allele", "string", index="no")
        props.add_property("imputed", "byte", index="no")

        load = Loader()
        options = {"indexName": idx, "shards": 5}
        load.mapping(props, idx_type, meta=meta, **options)
 def _load_pathway_mappings(cls, section):
     '''Function to load the elastic mappings'''
     idx = section['index']
     idx_type = section['index_type']
     pathway_mapping = MappingProperties(idx_type)
     pathway_mapping.add_property("pathway_name", "string")
     pathway_mapping.add_property("pathway_url", "string")
     pathway_mapping.add_property("gene_sets", "string")
     pathway_mapping.add_property("source", "string")
     pathway_mapping.add_property("is_public", "string")
     load = Loader()
     options = {"indexName": idx, "shards": 1}
     status = load.mapping(pathway_mapping, idx_type, **options)
     return status
Esempio n. 16
0
 def test_mapping_error(self):
     self.assertRaises(LoaderError,
                       Loader().mapping, 'MappingProperties', '')
     self.assertRaises(MappingError,
                       MappingProperties('').add_properties,
                       'MappingProperties')
Esempio n. 17
0
    def create_criteria_mapping(cls, idx, idx_type, test_mode=False):
        ''' function to create mapping for criteria indexes
        @type  idx: string
        @param idx: name of the index
        @type  idx_type: string
        @param idx_type: name of the idx type, each criteria is an index type
        @type  test_mode:  string
        @param test_mode: flag to create or not create the mapping
        '''
        logger.warning('Idx ' + idx)
        logger.warning('Idx_type ' + idx_type)
        ''' Create the mapping for alias indexing '''
        props = MappingProperties(idx_type)
        props.add_property("score", "integer")
        props.add_property("disease_tags", "string", index="not_analyzed")
        props.add_property("qid", "string", index="not_analyzed")
        (main_codes, other_codes) = CriteriaManager().get_available_diseases()

        for disease in main_codes + other_codes:
            criteria_tags = MappingProperties(disease)
            criteria_tags.add_property("fid", "string", index="not_analyzed")
            criteria_tags.add_property("fname", "string", index="not_analyzed")

            fnotes = MappingProperties('fnotes')
            fnotes.add_property('linkid', "string", index="not_analyzed")
            fnotes.add_property('linkname', "string", index="not_analyzed")
            fnotes.add_property('linkdata', "string", index="not_analyzed")
            fnotes.add_property('linkvalue', "string", index="not_analyzed")
            criteria_tags.add_properties(fnotes)
            props.add_properties(criteria_tags)

        ''' create index and add mapping '''
        load = Loader()
        options = {"indexName": idx, "shards": 5}

        '''add meta info'''
        config = CriteriaManager.get_criteria_config()
        idx_type_cfg = config[idx_type]
        desc = idx_type_cfg['desc']
        meta = {"desc": desc}
        if not test_mode:
            load.mapping(props, idx_type, meta=meta, analyzer=Loader.KEYWORD_ANALYZER, **options)
        return props
Esempio n. 18
0
 def _get_nested_prop(cls, nested_name, prop_name):
     org_props = MappingProperties(nested_name)
     org_props.add_property(prop_name, "string", index="not_analyzed")
     return org_props