def __init__(self, target, source=None, only_swissprot=True, mapper=None): """ This class translates between homologous UniProt IDs of 2 organisms based on NCBI HomoloGene data. Uses RefSeq and Entrez IDs for translation. :param int target: NCBI Taxonomy ID of the organism to be translated to. :param int source: NCBI Taxonomy ID of the default organism to be translated from. :param bool only_swissprot: Whether only SwissProt or Trembl IDs should be used. :mapper pypath.mapping.Mapper mapper: A Mapper object. """ self.h**o = {} self.only_swissprot = only_swissprot self.target = target self.source = source self.set_default_source(source) self.mapper = mapping.Mapper() if mapper is None else mapper Proteomes.__init__(self) self.load_proteome(self.target, self.only_swissprot) if source is not None: self.homologene_uniprot_dict(source)
def get_reg_rels(pa, prot_list): reg_rels = set() m = mapping.Mapper(9606) for prot in prot_list: # if pa.dgenesymbol(prot) is None: # continue # print(prot) pos_regs = set(pa.gs_stimulated_by(prot).gs()) neg_regs = set(pa.gs_inhibited_by(prot).gs()) ambig_regs = pos_regs.intersection(neg_regs) pos_regs = pos_regs - ambig_regs neg_regs = neg_regs - ambig_regs for reg in pos_regs: if reg == prot or reg not in prot_list: continue srcs = pa.get_edge(reg, prot)["sources"] if len(srcs) < 2: continue reg_rels.add((reg, prot, "activates")) for reg in neg_regs: if reg == prot or reg not in prot_list: continue srcs = pa.get_edge(reg, prot)["sources"] if len(srcs) < 2: continue reg_rels.add((reg, prot, "inhibits")) for reg in ambig_regs: if reg == prot or reg not in prot_list: continue srcs = pa.get_edge(reg, prot)["sources"] if len(srcs) < 2: continue dirs = pa.get_edge(reg, prot)["dirs"] signs_dict = dirs.majority_sign() signs = None for sign_pair in signs_dict: sign_reg = m.map_name(sign_pair[0], "uniprot", "genesymbol")[0] sign_sub = m.map_name(sign_pair[1], "uniprot", "genesymbol")[0] if sign_reg == reg and sign_sub == prot: sign = signs_dict[sign_pair] break if sign is None: print("crap", reg, prot) if sign is None or sign[0] and sign[1]: # ambiguous sign continue elif sign[0]: reg_rels.add((reg, prot, "activates")) elif sign[1]: reg_rels.add((reg, prot, "inhibits")) else: continue return reg_rels
def __init__(self, source, reactants=[], products=[], references=[], id_type='uniprot', names_reactants=None, names_products=None, ptms_reactants=None, ncbi_tax_id=9606, ptms_products=None, mapper=None, seq=None): ''' source : str Source database, e.g. `Reactome`. reactants : list List of reactants. List of string IDs or list of lists, if reactants are complexes. products : list List of products. List of string IDs or list of lists, if products are complexes. id_type : str Type of reactant and product IDs. Default is UniProt ID. mapper : pypath.mapping.Mapper If no Mapper instance given, a new one will be initialized. ''' self.mapper = mapper if mapper is not None else mapping.Mapper() self.source = source self.references = references self.ncbi_tax_id = ncbi_tax_id self.reactants = {} self.products = {} reactants = (ids if type(ids) is list else [ids] for ids in reactants) products = (ids if type(ids) is list else [ids] for ids in products) names_reactants = names_reactants if type(names_reactants) is list \ else (self.species_name(ids) for ids in self.reactants) names_reactants = dict( zip((species_id(ids) for ids in reactants), names_reactants)) names_products = names_products if type(names_products) is list \ else (self.species_name(ids) for ids in self.products) names_products = dict( zip((species_id(ids) for ids in products), names_products)) self.add_species('reactants', reactants, names_reactants, source) self.add_species('products', products, names_products, source) if source == 'Reactome': for i, name in names_reactants.iteritems(): ptms = self.reactome_ptms(i, name)
def __init__(self, user=None, mapper=None): self.user = user if user is not None \ else globals()['MSIGDB_USER'] if 'MSIGDB_USER' in globals() \ else None if self.user is not None: self.login() self.mapper = mapper if mapper is not None else mapping.Mapper() self.info = {} self.groups = {} self.sets = {} self.collections = {} self.list_collections() self.ids = {'entrez': 'entrez', 'symbol': 'genesymbol'} self.target_id = 'uniprot' else: sys.stdout.write('\t:: Please provide an MSigDB username by \n' '``pypath.gsea.GSEA(user = \'\', ...)``, or by \n' 'setting ``MSIGDB_USER`` global variable.\n\n') sys.stdout.flush()
def __init__(self, chembl_mysql=(None, 'chembl_ebi'), ncbi_tax_id=9606, mapping_mysql=None, mapper=None): self.mysql = mysql.MysqlRunner(chembl_mysql) self.ncbi_tax_id = ncbi_tax_id if mapper.__class__.__name__ != 'Mapper': self.mapper = mapping.Mapper(ncbi_tax_id, mapping_mysql) # self.mapper.load_mappings(maps=data_formats.mapListUniprot) else: self.mapper = mapper self.chembl_uniprot_table() self.result = None # constant elements: self.mandatory_fields = set([ 'compound_chembl', 'target_uniprot', 'tax_id', 'target_type', 'potential_duplicate' ]) self.extra_fields = [ 'compound_names', 'action_type', 'target_domains', 'predicted_binding_domains', 'activities', 'pchembl' ] self.set_group_concat_len = '''SET group_concat_max_len=18446744073709551615;''' self.group_concat_len_increased = False self.pbd_join = ''' /* predicted binding domains */ LEFT JOIN predicted_binding_domains AS pbd ON ac.activity_id = pbd.activity_id LEFT JOIN site_components AS sc ON pbd.site_id = sc.site_id LEFT JOIN domains AS pdm ON sc.domain_id = pdm.domain_id ''' self.pbd_select = ''', GROUP_CONCAT( DISTINCT(pdm.source_domain_id) SEPARATOR ";") AS predicted_binding_domains''' self.dom_join = ''' /* domains of the target */ LEFT JOIN component_domains AS cd ON tc.component_id = cd.component_id LEFT JOIN domains AS tdm ON cd.domain_id = tdm.domain_id ''' self.dom_select = ''', GROUP_CONCAT( DISTINCT(tdm.source_domain_id) SEPARATOR ";") AS target_domains''' self.atype_join = ''' LEFT JOIN drug_mechanism AS dm ON (dm.molregno = md.molregno AND dm.tid = td.tid)''' self.atype_select = ''', GROUP_CONCAT(DISTINCT(dm.action_type) SEPARATOR ';') AS action_type''' self.cprop_select = ',\n CAST(cp.%s AS CHAR) AS %s' self.cprop_join = ''' /* various properties of the compounds */ LEFT JOIN compound_properties AS cp ON md.molregno = cp.molregno''' self.act_join = ''' /* this is for activity values, if available */ LEFT JOIN assays AS ay ON dm.tid = ay.tid LEFT JOIN activities AS ac ON (ac.assay_id = ay.assay_id AND ac.molregno = dm.molregno)''' self.act_join = ''' /* this is for activity values, if available */ LEFT JOIN ( SELECT ay.tid, ac.standard_type, ac.standard_value, ac.standard_units, ac.pchembl_value, ac.molregno, ac.activity_id FROM assays AS ay INNER JOIN activities AS ac ON ac.assay_id = ay.assay_id ) AS ac ON (ac.molregno = dm.molregno AND ac.tid = dm.tid)''' self.act_select = ''', GROUP_CONCAT(DISTINCT(CONCAT( ac.standard_type,'=',ac.standard_value,'=',ac.standard_units )) SEPARATOR ';') AS activities''' self.pchembl_select = ''', GROUP_CONCAT(DISTINCT(ac.pchembl_value) SEPARATOR ';') AS pchembl''' self.group_concat_len = '''SET group_concat_max_len=18446744073709551615;''' self.comp_syn = '''SELECT ms.synonyms AS syn, md.chembl_id FROM molecule_synonyms AS ms LEFT JOIN molecule_dictionary AS md ON ms.molregno = md.molregno WHERE ms.synonyms IN (%s) GROUP BY ms.synonyms, md.chembl_id ORDER BY NULL;''' self.comp_rec = '''SELECT
def init_mapper(self): self.mapper = self.mapper or mapping.Mapper()
def init_mapper(self): if self.mapper is None: self.mapper = mapping.Mapper()
def set_mapper(self): if self.mapper is None: self.mapper = mapping.Mapper(ncbi_tax_id=self.ncbi_tax_id)
# cogent and sqlalchemy modules need to be installed: # pip2 install cogent # pip2 install sqlalchemy from cogent.db.ensembl import HostAccount, Species, Genome from pypath import mapping Release = 78 account = HostAccount('ensembldb.ensembl.org', 'anonymous', '') human = Genome(Species='human', Release=Release, account=account) # UniProt, seq offset, residue, isoform positions = [('P00533', 40, 'Q', 1), ('P60520', 30, 'P', 1)] m = mapping.Mapper() m.load_uniprot_mappings(['ensg'], bi=True) positions_ens = [] for p in positions: ensgs = m.map_name(p[0], 'uniprot', 'ensg') for ensg in ensgs: genes = human.getGenesMatching(StableId=ensg) for gene in genes: positions_ens.append( tuple([ensg, gene.Location, gene.CanonicalTranscript.Exons] + list(p))) # another attempts with biopython -- # (it works, if you can map all proteins to RefSeq Gene IDs)