Esempio n. 1
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)

            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            n_unique_hba_hbd_atoms = count_hbd_hba_atoms(m)
            max_ring_size = len(max(m.GetRingInfo().AtomRings(), key=len, default=()))
            n_chiral_centers = len(FindMolChiralCenters(m, includeUnassigned=True))
            fcsp3_bm = rdMolDescriptors.CalcFractionCSP3(GetScaffoldForMol(m))
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused, n_unique_hba_hbd_atoms, \
                   max_ring_size, n_chiral_centers, round(fcsp3_bm, 3)
        except:
            sys.stderr.write(f'molecule {name} was omitted due to an error in calculation of some descriptors\n')
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Esempio n. 2
0
def get_MCSs(test_mols, known_mols, nns_indices=None, murcko_scaff=False):
    
    if nns_indices is None:
        nns_indices = [np.arange(len(known_mols))]*len(test_mols)
    
    if murcko_scaff:
        f = lambda x: GetScaffoldForMol(x)
    else:
        f = lambda x: x
        
    known_mols = np.array(known_mols)
    
    MCSs, MCS_matches, NN_mols, NN_MCS_matches  = [], [], [], []
    
    for query_mol, nn_i in list(zip(test_mols, nns_indices)):
        
        known_subset = known_mols[nn_i]
        
        query_MCS = [get_mcs(query_mol, m, f) for m in tqdm(known_subset)]
        query_MCS_sim = [m.numAtoms for m in query_MCS]
        NN_mol = known_subset[np.argmax(query_MCS_sim)]
        mcs = query_MCS[np.argmax(query_MCS_sim)]        
        mcs_mol = MolFromSmarts(mcs.smartsString)
        NN_mol_match = NN_mol.GetSubstructMatch(mcs_mol)
        query_mol_match = query_mol.GetSubstructMatch(mcs_mol)
        
        MCSs.append(mcs)
        MCS_matches.append(query_mol_match)
        NN_mols.append(NN_mol)
        NN_MCS_matches.append(NN_mol_match)
        
    return MCSs, MCS_matches, NN_mols, NN_MCS_matches
Esempio n. 3
0
 def test2MurckoScaffold(self):
   # Test the functionality on a larger test set
   for testMol in self.testMolecules2:
     mol = Chem.MolFromSmiles(testMol.smiles)
     calcScaffold = Chem.MolToSmiles(GetScaffoldForMol(mol))
     actualScaffold = Chem.MolToSmiles(Chem.MolFromSmiles(testMol.scaffold))
     self.assertEqual(calcScaffold, actualScaffold)
Esempio n. 4
0
 def test_ReferenceImplementation(self):
     # Check that the C++ implementation is equivalent to the Python reference implementation
     for testMol in self.testMolecules:
         mol = Chem.MolFromSmiles(testMol.smiles)
         calcScaffold1 = Chem.MolToSmiles(GetScaffoldForMol(mol))
         calcScaffold2 = Chem.MolToSmiles(_pyGetScaffoldForMol(mol))
         self.assertEqual(calcScaffold1, calcScaffold2)
Esempio n. 5
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)
            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused
        except:
            sys.stderr.write(
                f'molecule {name} was omitted due to an error in calculation of some descriptors\n'
            )
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Esempio n. 6
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m:
        scaff = Chem.MolToSmiles(GetScaffoldForMol(m), isomericSmiles=False)
        return name, scaff
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
def find_boundary_bonds(mol, scaffold_atom_indices=None):
    if not scaffold_atom_indices:
        scaffold = GetScaffoldForMol(mol)
        scaffold_atom_indices = mol.GetSubstructMatch(scaffold)
    return [
        bond for atom_idx in scaffold_atom_indices
        for bond in mol.GetAtomWithIdx(atom_idx).GetBonds()
        if bond.GetOtherAtomIdx(atom_idx) not in scaffold_atom_indices
    ]
Esempio n. 8
0
def test_scheme_1():
    """SCHEME 1:

    Scheme 1 refers to murcko scaffold construction where excocyclic double bonds and
    double bonds directly attached to the linker ("exolinker double bonds") are kept
    """

    # Flucloxacillin
    test_smiles = 'CC1=C(C(=NO1)C2=C(C=CC=C2Cl)F)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O'
    result_smiles = canon('O=C(NC1C(=O)N2CCSC12)c1conc1-c1ccccc1')
    scaffold = Scaffold(GetScaffoldForMol(Chem.MolFromSmiles(test_smiles)))
    assert scaffold.smiles == result_smiles
Esempio n. 9
0
def murcko_scaffold(smiles, generic, isomeric):
    from rdkit.Chem.Scaffolds.MurckoScaffold import (
        GetScaffoldForMol,
        MakeScaffoldGeneric,
    )
    assert isinstance(generic, bool)
    assert isinstance(isomeric, bool)
    mol = MolFromSmiles(smiles)
    mol = GetScaffoldForMol(mol)
    if generic:
        mol = MakeScaffoldGeneric(mol)
    return MolToSmiles(mol, canonical=True, isomericSmiles=isomeric)
Esempio n. 10
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        hba = rdMolDescriptors.CalcNumHBA(m)
        hbd = rdMolDescriptors.CalcNumHBD(m)
        nrings = rdMolDescriptors.CalcNumRings(m)
        rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
        psa = rdMolDescriptors.CalcTPSA(m)
        logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
        mw = rdMolDescriptors._CalcMolWt(m)
        csp3 = rdMolDescriptors.CalcFractionCSP3(m)
        fmf = GetScaffoldForMol(m).GetNumAtoms(onlyHeavy=True) / m.GetNumAtoms(onlyHeavy=True)
        return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
               round(csp3, 3), round(fmf, 3)
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Esempio n. 11
0
def calcFingerprints(smiles,preproc=True):
	global global_mol_dict
	try:
		if preproc: precalculated = global_mol_dict[smiles]
		else: precalculated = global_mol_dict_no_preproc[smiles]
		if precalculated is not None: return precalculated
		else: raise PreprocessViolation(' Molecule preprocessing violation')
	except KeyError:
		m1 = Chem.MolFromSmiles(smiles)
		if preproc: m1 = preprocessMolecule(m1)
		if not m1: 
			global_mol_dict[smiles] = None
			raise PreprocessViolation(' Molecule preprocessing violation') 
		scaf = Chem.MolToSmiles(MakeScaffoldGeneric(GetScaffoldForMol(m1)))
		fp = AllChem.GetMorganFingerprintAsBitVect(m1,2, nBits=2048)
		bitstring = list(map(int,list(fp.ToBitString())))
	if preproc: global_mol_dict[smiles] = [bitstring, scaf]
	else: global_mol_dict_no_preproc[smiles] = [bitstring, scaf]
	return bitstring, scaf
Esempio n. 12
0
def getScaffold(mol):
    try:
        return Chem.MolToSmiles(GetScaffoldForMol(mol))
    except:
        return None