Example #1
0
    def __init__(self, drug_decode):
        print("ChemSpiderSearch is still in progress, please do not use")
        self.dd = DrugDecode(drug_decode)
        self.dd_filled = DrugDecode(drug_decode)

        from bioservices.chemspider import ChemSpider
        from bioservices import ChEMBL
        from bioservices import UniChem

        try:
            print('Loading PubChem')
            from bioservices.pubchem import PubChem
            self.puchem = PubChem()
        except:
            # Pubchem was introduced only in dec 2015
            pass

        print('Loading ChEMBL service')
        self.chembl = ChEMBL(cache=True)

        print('Loading ChemSpider service')
        self.chemspider = ChemSpider(cache=True)

        print('Loading UniChem service')
        # in unichem db number is 22 and chembl is 1
        self.unichem = UniChem()

        print('Settings some data aliases')
        self._cs_find = self.chemspider.find
        self._cs_get = self.chemspider.GetExtendedCompoundInfo

        self.drug_ids = sorted(list(self.dd.df.index.values))
        self.drug_names = sorted(list(self.dd.df.DRUG_NAME.values))
Example #2
0
def get_compound_id(smiles):
    """ returns kegg id for compund with given smiles """
    indigo = Indigo()
    # convert smiles to standard format
    mol = indigo.loadMolecule(smiles)
    mol.aromatize()
    moi_smiles = mol.canonicalSmiles()

    # Get list of possible kegg IDs
    url = "http://rest.genome.jp/subcomp/?smiles=%s&cutoff=1.0" % smiles
    http_client = HTTPClient()
    try:
        response = http_client.fetch(url).body
    except HTTPError as e:
        raise RuntimeError("Error:", str(e))
    http_client.close()
    subcomp_results = response.split("\n")
    subcomp_results.pop()
    subcomp_results = ([i.split('\t')[0] for i in subcomp_results])

    # get smiles for all compound IDs found
    all_smiles = []
    uni = UniChem()
    mapping = uni.get_mapping("kegg_ligand", "chebi")
    ch = ChEBI()
    all_smiles = [ch.getCompleteEntity(mapping[x]).smiles
                  for x in subcomp_results]

    # convert smiles to a standard format
    for pos, mol in enumerate(all_smiles):
        m = indigo.loadMolecule(mol)
        m.aromatize()
        all_smiles[pos] = m.canonicalSmiles()

    # check if smiles matches given and, if so, use that compound ID
    # if not, errors out
    try:
        index = all_smiles.index(moi_smiles)
    except:
        raise RuntimeError("SMILES unmatchable to: %s" % str(all_smiles))
    return subcomp_results[index]
Example #3
0
def test_get_source_id():
    uni = UniChem()
    assert uni.get_source_id("chembl") == 1
    assert uni.get_source_id("1") == 1
    assert uni.get_source_id(1) == 1

    try:
        uni.get_source_id("wrong")
        assert False
    except:
        assert True

    try:
        uni.get_source_id("20000")
        assert False
    except:
        assert True
Example #4
0
    def __init__(self, verbosity="INFO"):
        super(Mapper, self).__init__(level=verbosity)
        self.logging.info("Initialising the services")
        self.logging.info("... uniprots")
        self._uniprot_service = UniProt()

        self.logging.info("... KEGG")
        self._kegg_service = KeggParser(verbose=False)

        self.logging.info("... HGNC")
        self._hgnc_service = HGNC()

        self.logging.info("... UniChem")
        self._unichem_service = UniChem()

        self.logging.info("...BioDBNet")
        self._biodbnet = BioDBNet()
Example #5
0
    def convert_kegg_nodes(self, network):
        """
        Maps network from kegg to gene names

        Parameters
        ----------
        network : nx.DiGraph

        Returns
        -------
        dict

        """

        chem = UniChem()

        still_unknown = []
        hits = [i for i in set(network.nodes) if i.startswith('cpd:')]
        net_kegg_names = dict()
        net_chem_names = dict()
        net_cpd_to_hmdb = dict()
        for i in hits:

            name_stripped = i.lstrip('cpd:')
            net_kegg_names[i] = name_stripped

            if name_stripped in self.kegg_to_hmdb:
                mapping = self.kegg_to_hmdb[name_stripped]
                if isinstance(mapping, (list, set, SortedSet)):
                    names = '|'.join(set(mapping))
                    chem_names = set()
                    for name in mapping:
                        try:
                            chem_names.update(self.hmdb_to_chem_name[name])
                        except:
                            continue
                    net_cpd_to_hmdb[i] = names
                    net_chem_names[i] = order_merge(chem_names)

                elif isinstance(mapping, basestring):

                    chem_n = self.hmdb_to_chem_name[mapping]
                    net_cpd_to_hmdb[i] = mapping
                    net_chem_names[i] = '|'.join(chem_n.encode('ascii',
                                                               'ignore'))
                else:
                    print('Returned something else...', mapping)

            elif i in compound_manual:
                loc = compound_manual[i]
                net_cpd_to_hmdb[i] = loc
                if loc in self.hmdb_to_chem_name:
                    net_chem_names[i] = order_merge(
                        self.hmdb_to_chem_name[loc])
            else:
                still_unknown.append(i)
        if len(still_unknown):
            kegg_hmdb = chem.get_mapping("kegg_ligand", "hmdb")
            for i in still_unknown:
                name_stripped = i.lstrip('cpd:')
                if name_stripped in kegg_hmdb:
                    net_cpd_to_hmdb[i] = kegg_hmdb[name_stripped]
                # else:
                #     print("Cannot find a HMDB mapping for %s " % i)
        return net_cpd_to_hmdb, net_kegg_names, net_chem_names
Example #6
0
"""
from argparse import ArgumentParser
import mysql.connector as mysqlc
from bioservices import UniChem

#Define command line options and defaults
parser = ArgumentParser()
parser.add_argument("-c",
                    "--mysqlconf",
                    dest="mysql_conf",
                    default="/home/kkmattil/Documents/DDCB/mysql_write.conf",
                    help="MySQL_conf",
                    metavar="MYSQL_CONF")
args = parser.parse_args()

uniC = UniChem()

#Open mysql connection
cnx = mysqlc.connect(option_files=args.mysql_conf)

cursor = cnx.cursor()

SQL_drop_table = ('DROP TABLE unichem_links;')
cursor.execute(SQL_drop_table)
cnx.commit()
cursor = cnx.cursor()

SQL_create_table_if_needed = (
    'CREATE TABLE IF NOT EXISTS unichem_links(comp_num INT,  source_id INT, source VARCHAR(200), id_in_db  VARCHAR(200)) ENGINE=INNODB;'
)
cursor.execute(SQL_create_table_if_needed)
Example #7
0
def test_get_auxiliary_mapping():
    # this does nothing (behaviour of the function)
    uni = UniChem()
    res = uni.get_auxiliary_mappings(1)
Example #8
0
def test_get_src_compoundid_url():
    uni = UniChem()
    uni.get_src_compound_id_url("CHEMBL12", "chembl", "drugbank")
Example #9
0
def test_unichem_src_compound_from_inchikey():
    uni = UniChem()
    uni.get_src_compound_ids_from_inchikey("AAOVKJBEBIDNHE-UHFFFAOYSA-N")
    uni.get_src_compound_ids_all_from_inchikey("AAOVKJBEBIDNHE-UHFFFAOYSA-N")
Example #10
0
def test_get_src_compound_ids_all_from_obsolete():
    uni = UniChem()
    res = uni.get_src_compound_ids_all_from_obsolete("DB07699", 2)
    res = uni.get_src_compound_ids_all_from_obsolete("DB07699", 2, "chembl")
Example #11
0
def test_unichem_src_compound():
    uni = UniChem()
    uni.get_src_compound_ids_from_src_compound_id("CHEMBL12", "chembl", "chebi")
    uni.get_src_compound_ids_all_from_src_compound_id("CHEMBL12", "chembl","drugbank")
Example #12
0
def test_structure():
    uni = UniChem()
    uni.get_structure("CHEMBL12", "chembl")
    uni.get_structure_all("CHEMBL12", "chembl")
Example #13
0
def test_src_ids():
    uni = UniChem()
    uni.get_all_src_ids()
    uni.get_source_information("chembl")
Example #14
0
def test_mapping():
    uni = UniChem()
    res1 = uni.get_mapping("kegg_ligand", "chembl")
    assert len(res1)>0
Example #15
0
def unichem():
    u = UniChem(verbose=False)
    return u
Example #16
0
def test_get_verbose_src_compound_ids_fron_inchikey():
    uni = UniChem()
    uni.get_verbose_src_compound_ids_from_inchikey("GZUITABIAKMVPG-UHFFFAOYSA-N")
Example #17
0
    def convert_kegg_nodes(self, network):
        """
        Maps network from kegg to gene names

        Parameters
        ----------
        network : nx.DiGraph

        Returns
        -------
        dict

        """

        chem = UniChem()

        still_unknown = []
        hits = [i for i in set(network.nodes) if i.startswith('cpd:')]
        net_kegg_names = dict()
        net_chem_names = dict()
        net_cpd_to_hmdb = dict()
        for i in hits:

            name_stripped = i.lstrip('cpd:')
            net_kegg_names[i] = name_stripped

            if name_stripped in self.kegg_to_hmdb:
                mapping = self.kegg_to_hmdb[name_stripped]
                if isinstance(mapping, (list, set, SortedSet)):
                    names = '|'.join(set(mapping))
                    chem_names = set()
                    for name in mapping:
                        try:
                            chem_names.update(self.hmdb_to_chem_name[name])
                        except:
                            continue
                    net_cpd_to_hmdb[i] = names
                    net_chem_names[i] = order_merge(chem_names)

                elif isinstance(mapping, basestring):

                    chem_n = self.hmdb_to_chem_name[mapping]
                    net_cpd_to_hmdb[i] = mapping
                    net_chem_names[i] = '|'.join(
                        chem_n.encode('ascii', 'ignore'))
                else:
                    print('Returned something else...', mapping)

            elif i in compound_manual:
                loc = compound_manual[i]
                net_cpd_to_hmdb[i] = loc
                if loc in self.hmdb_to_chem_name:
                    net_chem_names[i] = order_merge(
                        self.hmdb_to_chem_name[loc])
            else:
                still_unknown.append(i)
        if len(still_unknown):
            kegg_hmdb = chem.get_mapping("kegg_ligand", "hmdb")
            for i in still_unknown:
                name_stripped = i.lstrip('cpd:')
                if name_stripped in kegg_hmdb:
                    net_cpd_to_hmdb[i] = kegg_hmdb[name_stripped]
                # else:
                #     print("Cannot find a HMDB mapping for %s " % i)
        return net_cpd_to_hmdb, net_kegg_names, net_chem_names