Ejemplo n.º 1
0
def resolve(mystery):
    ret = 'NOT FOUND'
    if mystery.istartswith('chembl'):
        return mystery.upper()

    molecule = new_client.molecule

    res = molecule.filter(pref_name__iexact=mystery)
    if len(res):
        return ', '.join(x['molecule_chembl_id'] for x in res)

    res = [x for x in molecule.search(mystery) if inspect_synonyms(x, mystery)]
    if len(res):
        return ', '.join(x['molecule_chembl_id'] for x in res)

    if inchi_key_regex.match(mystery.upper()):
        res = molecule.get(mystery.upper())
        if res:
            ret = res['molecule_chembl_id']
        return ret

    if smilesRegex.match(mystery):
        inchi_key = utils.inchi2inchiKey(utils.ctab2inchi(utils.smiles2ctab(mystery)))
    elif mystery.upper().startswith('INCHI='):
        inchi_key = utils.inchi2inchiKey(mystery)
    if inchi_key:
        res = molecule.get(mystery.upper())
    if res:
        ret = res['molecule_chembl_id']
    return ret
Ejemplo n.º 2
0
def resolve(mystery):
    ret = 'NOT FOUND'
    if mystery.istartswith('chembl'):
        return mystery.upper()

    molecule = new_client.molecule

    res = molecule.filter(pref_name__iexact=mystery)
    if len(res):
        return ', '.join(x['molecule_chembl_id'] for x in res)

    res = [x for x in molecule.search(mystery) if inspect_synonyms(x, mystery)]
    if len(res):
        return ', '.join(x['molecule_chembl_id'] for x in res)

    if inchi_key_regex.match(mystery.upper()):
        res = molecule.get(mystery.upper())
        if res:
            ret = res['molecule_chembl_id']
        return ret

    if smilesRegex.match(mystery):
        inchi_key = utils.inchi2inchiKey(
            utils.ctab2inchi(utils.smiles2ctab(mystery)))
    elif mystery.upper().startswith('INCHI='):
        inchi_key = utils.inchi2inchiKey(mystery)
    if inchi_key:
        res = molecule.get(mystery.upper())
    if res:
        ret = res['molecule_chembl_id']
    return ret
Ejemplo n.º 3
0
def resolve(mystery, single_result=False):
    molecule = new_client.molecule
    molecule.set_format('json')

    if mystery.lower().startswith('chembl'):
        return [molecule.get(mystery.upper())]

    res = molecule.filter(pref_name__iexact=mystery)
    if res:
        return res if not single_result else [res[0]]

    res = [
        x for x in molecule.search(mystery.lower())
        if inspect_synonyms(x, mystery)
    ]
    if res:
        return res if not single_result else [res[0]]

    if inchi_key_regex.match(mystery.upper()):
        return [molecule.get(mystery.upper())]

    inchi_key = None
    if smiles_regex.match(mystery):
        inchi_key = utils.inchi2inchiKey(
            utils.ctab2inchi(utils.smiles2ctab(mystery)))
    elif mystery.upper().startswith('INCHI='):
        inchi_key = utils.inchi2inchiKey(mystery)
    if inchi_key:
        return [molecule.get(inchi_key.upper())]
    es = ElasticClient()
    res = es.search_molecule(mystery)
    if res:
        res = molecule.filter(molecule_chembl_id__in=res)
        return res if not single_result else [res[0]]
    def test_compute_maximal_common_substructure(self):

        smiles = ["O=C(NCc1cc(OC)c(O)cc1)CCCC/C=C/C(C)C", "CC(C)CCCCCC(=O)NCC1=CC(=C(C=C1)O)OC", "c1(C=O)cc(OC)c(O)cc1"]
        mols = [utils.smiles2ctab(smile) for smile in smiles]
        sdf = ''.join(mols)
        result = utils.mcs(sdf)
        self.assertEqual(result, '[#6]1(-[#6]):[#6]:[#6](-[#8]-[#6]):[#6](:[#6]:[#6]:1)-[#8]')
Ejemplo n.º 5
0
def resolve(mystery, single_result=False):
    molecule = new_client.molecule
    molecule.set_format('json')

    if mystery.lower().startswith('chembl'):
        return [molecule.get(mystery.upper())]

    res = molecule.filter(pref_name__iexact=mystery)
    if res:
        return res if not single_result else [res[0]]

    res = [x for x in molecule.search(mystery.lower()) if inspect_synonyms(x, mystery)]
    if res:
        return res if not single_result else [res[0]]

    if inchi_key_regex.match(mystery.upper()):
        return [molecule.get(mystery.upper())]

    inchi_key = None
    if smiles_regex.match(mystery):
        inchi_key = utils.inchi2inchiKey(utils.ctab2inchi(utils.smiles2ctab(mystery)))
    elif mystery.upper().startswith('INCHI='):
        inchi_key = utils.inchi2inchiKey(mystery)
    if inchi_key:
        return [molecule.get(inchi_key.upper())]
    es = ElasticClient()
    res = es.search_molecule(mystery)
    if res:
        res = molecule.filter(molecule_chembl_id__in=res)
        return res if not single_result else [res[0]]
Ejemplo n.º 6
0
 def test_utils_json_images(self):
     aspirin = 'O=C(Oc1ccccc1C(=O)O)C'
     js1 = json.loads(utils.smiles2json(aspirin))
     self.assertEqual(len(js1), 33)
     self.assertTrue('path' in js1[0] and 'stroke' in js1[0] and 'type' in js1[0])
     mol = utils.smiles2ctab(aspirin)
     js2 = json.loads(utils.ctab2json(mol))
     self.assertEqual(len(js1), len(js2))
Ejemplo n.º 7
0
 def test_utils_svg_images(self):
     benzene = 'c1ccccc1'
     svg1 = utils.smiles2svg(benzene)
     self.assertTrue(len(svg1) > 2000)
     self.assertTrue(svg1.startswith('<?xml version="1.0" encoding="UTF-8"?>'))
     mol = utils.smiles2ctab(benzene)
     svg2 = utils.ctab2svg(mol)
     self.assertEqual(svg1, svg2)
Ejemplo n.º 8
0
 def test_utils_raster_images(self):
     aspirin = 'O=C(Oc1ccccc1C(=O)O)C'
     img1 = utils.smiles2image(aspirin)
     self.assertEqual(img1[0:4], '\x89PNG')
     self.assertTrue(len(img1) > 5000)
     mol = utils.smiles2ctab(aspirin)
     img2 = utils.ctab2image(mol)
     self.assertEqual(img2[0:4], '\x89PNG')
     self.assertTrue(len(img2) > 5000)
Ejemplo n.º 9
0
 def test_utils_fingerprints(self):
     aspirin = utils.smiles2ctab('O=C(Oc1ccccc1C(=O)O)C')
     fingerprints = utils.sdf2fps(aspirin)
     parts = fingerprints.split()
     self.assertEqual(parts[0], '#FPS1')
     self.assertEqual(parts[1], '#num_bits=2048')
     self.assertTrue(parts[2].startswith('#software='))
     self.assertEqual(len(parts[3]), 512)
     self.assertEqual(parts[4], 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N')
Ejemplo n.º 10
0
def get_calculatable_properties(smile_formula):
    compound = utils.smiles2ctab(smile_formula)
    '''
    num_atoms = json.loads(utils.getNumAtoms(compound))[0]
    mol_wt = json.loads(utils.molWt(compound))[0]
    log_p = json.loads(utils.logP(compound))[0]
    tpsa = json.loads(utils.tpsa(compound))[0]
    from chembl_beaker.beaker.core_apps.descriptors.impl import _getNumAtoms,_getNumBonds, _getLogP, _getTPSA, _getMolWt, _getDescriptors
    ['MolecularFormula', 'RingCount', 'NumRotatableBonds', 'HeavyAtomCount', 'MolWt', 'MolLogP', 'TPSA']
    '''
    descriptors = json.loads(utils.descriptors(compound))[0]
    return descriptors
Ejemplo n.º 11
0
 def test_utils_standardisation(self):
     mol = utils.smiles2ctab("[Na]OC(=O)c1ccccc1")
     br = utils.breakbonds(mol)
     smiles = utils.ctab2smiles(br).split()[2]
     self.assertEqual(smiles, '[Na+].O=C([O-])c1ccccc1')
     mol = utils.smiles2ctab("C(C(=O)[O-])(Cc1n[n-]nn1)(C[NH3+])(C[N+](=O)[O-])")
     ne = utils.neutralise(mol)
     smiles = utils.ctab2smiles(ne).split()[2]
     self.assertEqual(smiles, 'NCC(Cc1nn[nH]n1)(C[N+](=O)[O-])C(=O)O')
     mol = utils.smiles2ctab("Oc1nccc2cc[nH]c(=N)c12")
     ru = utils.rules(mol)
     smiles = utils.ctab2smiles(ru).split()[2]
     self.assertEqual(smiles, 'Nc1nccc2cc[nH]c(=O)c12')
     mol = utils.smiles2ctab("[Na+].OC(=O)Cc1ccc(CN)cc1.OS(=O)(=O)C(F)(F)F")
     un = utils.unsalt(mol)
     smiles = utils.ctab2smiles(un).split()[2]
     self.assertEqual(smiles, 'NCc1ccc(CC(=O)O)cc1')
     mol = utils.smiles2ctab("[Na]OC(=O)Cc1ccc(C[NH3+])cc1.c1nnn[n-]1.O")
     st = utils.standardise(mol)
     smiles = utils.ctab2smiles(st).split()[2]
     self.assertEqual(smiles, 'NCc1ccc(CC(=O)O)cc1')
Ejemplo n.º 12
0
 def test_utils_3D_coords(self):
     aspirin = 'O=C(Oc1ccccc1C(=O)O)C'
     mol_3D = utils.smiles23D(aspirin)
     lines = mol_3D.split('\n')
     atoms_lines = lines[4:25]
     z_coords = [float(line.split()[2]) for line in atoms_lines]
     self.assertTrue(any(z_coords))
     mol = utils.smiles2ctab(aspirin)
     mol_3D1 = utils.ctab23D(mol)
     lines = mol_3D1.split('\n')
     atoms_lines = lines[4:25]
     z_coords = [float(line.split()[2]) for line in atoms_lines]
     self.assertTrue(any(z_coords))
Ejemplo n.º 13
0
 def test_utils_format_conversion(self):
     smiles = 'O=C(Oc1ccccc1C(=O)O)C' # aspirin
     ctab = utils.smiles2ctab(smiles)
     self.assertIsNotNone(ctab)
     canonical_smiles = utils.ctab2smiles(ctab).split()[2]
     self.assertEqual(canonical_smiles, 'CC(=O)Oc1ccccc1C(=O)O')
     inchi = utils.ctab2inchi(ctab)
     self.assertEqual(inchi, 'InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)')
     inchiKey = utils.inchi2inchiKey(inchi)
     self.assertEqual(inchiKey, 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N')
     ctab2 = utils.inchi2ctab(inchi)
     self.assertIsNotNone(ctab2)
     smiles2 = utils.ctab2smiles(ctab2).split()[2]
     self.assertEqual(canonical_smiles, smiles2)
Ejemplo n.º 14
0
 def test_utils_calculations(self):
     aspirin = utils.smiles2ctab('O=C(Oc1ccccc1C(=O)O)C')
     num_atoms = json.loads(utils.getNumAtoms(aspirin))[0]
     self.assertEqual(num_atoms, 13)
     mol_wt = json.loads(utils.molWt(aspirin))[0]
     self.assertAlmostEqual(mol_wt, 180.159, 3)
     log_p = json.loads(utils.logP(aspirin))[0]
     self.assertAlmostEqual(log_p, 1.31, 2)
     tpsa = json.loads(utils.tpsa(aspirin))[0]
     self.assertAlmostEqual(tpsa, 63.6, 1)
     descriptors = json.loads(utils.descriptors(aspirin))[0]
     self.assertEqual(descriptors['MolecularFormula'], 'C9H8O4')
     self.assertEqual(descriptors['RingCount'], 1)
     self.assertEqual(descriptors['NumRotatableBonds'], 3)
     self.assertEqual(descriptors['HeavyAtomCount'], num_atoms)
     self.assertAlmostEqual(mol_wt, descriptors['MolWt'], 3)
     self.assertAlmostEqual(log_p, descriptors['MolLogP'], 2)
     self.assertAlmostEqual(tpsa, descriptors['TPSA'], 1)
    def test_compute_molecular_descriptors(self):

        aspirin = utils.smiles2ctab('O=C(Oc1ccccc1C(=O)O)C')

        num_atoms = json.loads(utils.getNumAtoms(aspirin))[0]
        self.assertEqual(num_atoms, 13)

        mol_wt = json.loads(utils.molWt(aspirin))[0]
        self.assertTrue(180 < mol_wt < 181)

        log_p = json.loads(utils.logP(aspirin))[0]
        self.assertTrue(1.31 < log_p < 1.32)

        tpsa = json.loads(utils.tpsa(aspirin))[0]
        self.assertTrue(63 < tpsa < 64)

        descriptors = json.loads(utils.descriptors(aspirin))[0]
        self.assertEqual(descriptors['MolecularFormula'], 'C9H8O4')
Ejemplo n.º 16
0
def get_fingerprint(smile_formula):
	''' fingerprint appears to be the unique id '''
	from chembl_webresource_client.utils import utils
	compound = utils.smiles2ctab(smile_formula)
	fingerprints = utils.sdf2fps(compound)
Ejemplo n.º 17
0
 def test_utils_mcs(self):
     smiles = ["O=C(NCc1cc(OC)c(O)cc1)CCCC/C=C/C(C)C", "CC(C)CCCCCC(=O)NCC1=CC(=C(C=C1)O)OC", "c1(C=O)cc(OC)c(O)cc1"]
     mols = [utils.smiles2ctab(smile) for smile in smiles]
     sdf = ''.join(mols)
     result = utils.mcs(sdf)
     self.assertEqual(result, '[#6]-[#6]:1:[#6]:[#6](:[#6](:[#6]:[#6]:1)-[#8])-[#8]-[#6]')
Ejemplo n.º 18
0
def get_biggest_common_structure(smile_formulas):
	from chembl_webresource_client.utils import utils
	mols = [utils.smiles2ctab(smile) for smile in smile_formulas]
	sdf = ''.join(mols)
	result = utils.mcs(sdf)
    def test_covert_smiles_to_ctab(self):

        aspirin = utils.smiles2ctab('O=C(Oc1ccccc1C(=O)O)C')
        self.assertTrue('V2000' in aspirin)
    def test_compute_fingerprints(self):

        aspirin = utils.smiles2ctab('O=C(Oc1ccccc1C(=O)O)C')
        fingerprints = utils.sdf2fps(aspirin)
        self.assertTrue(fingerprints.startswith('#FPS'))
Ejemplo n.º 21
0
def standardize_compound(smile_formula):
    mol = utils.smiles2ctab(smile_formula)
    st = utils.standardise(mol)
    return st
    def test_standardise_molecule(self):

        mol = utils.smiles2ctab("[Na]OC(=O)Cc1ccc(C[NH3+])cc1.c1nnn[n-]1.O")
        st = utils.standardise(mol)
        self.assertTrue('V2000' in st)