def preprocess_smi(smi): # Filter 1- Convert to Canonical Smiles try: mol = Chem.MolFromSmiles(smi) can_smi = Chem.MolToSmiles(mol, True) except: return None # Filter 2- Remove salt remover = SaltRemover() mol = Chem.MolFromSmiles(can_smi) res, deleted = remover.StripMolWithDeleted(mol, dontRemoveEverything=True) removed_salt_smi = Chem.MolToSmiles(res) # Filter 3- Remove Charge uncharger = rdMolStandardize.Uncharger() m = Chem.MolFromSmiles(removed_salt_smi) p = uncharger.uncharge(m) uncharged_smi = Chem.MolToSmiles(p) # Filter 4 - Standardize the tautomer clean_smi = MolStandardize.canonicalize_tautomer_smiles(uncharged_smi) return clean_smi
def test_withDontRemoveEverything(self): testFile = os.sep.join( [os.path.dirname(os.path.abspath(__file__)), 'test_data', 'witch-salts.sdf']) remover = SaltRemover(defnFilename=testFile, defnFormat=InputFormat.MOL) m = Chem.MolFromSmiles('Cc1ccccc1') mol, deleted = remover.StripMolWithDeleted(m, dontRemoveEverything=True) # List should be empty self.assertFalse(deleted) self.assertEqual(m, mol)
def test_withSdfFile(self): testFile = os.sep.join( [os.path.dirname(os.path.abspath(__file__)), 'test_data', 'witch-salts.sdf']) remover = SaltRemover(defnFilename=testFile, defnFormat=InputFormat.MOL) self.assertEqual(len(remover.salts), 240) m = Chem.MolFromSmiles("Cc1onc(-c2ccccc2)c1C([O-])=NC1C(=O)N2C1SC(C)(C)C2C(=O)O.O.[Na+]") tuple = remover.StripMolWithDeleted(m) self.assertEqual(Chem.MolToSmiles(tuple.mol), 'Cc1onc(-c2ccccc2)c1C([O-])=NC1C(=O)N2C1SC(C)(C)C2C(=O)O.O') self.assertEqual(len(tuple.deleted), 1) self.assertEqual(Chem.MolToSmiles(tuple.deleted[0]), '[Na+]')
def check_salt(self, molecule: str, subType: str) -> str: """ Checks if the molecule is salt. :param molecule: :return salt: """ remover = SaltRemover() salt = None res, deleted = remover.StripMolWithDeleted(self.smiles_mol) if len(deleted) >= 1: salt = '_'.join([subType, 'salt']) return salt
def test_SmilesVsSmarts(self): # SMARTS remover = SaltRemover(defnData="[Cl,Br]") mol = Chem.MolFromSmiles('CN(Br)Cl.Cl') res = remover.StripMol(mol) self.assertEqual(res.GetNumAtoms(), 4) self.assertEqual(Chem.MolToSmiles(res), 'CN(Cl)Br') mol = Chem.MolFromSmiles('CN(C)C.Cl.Br') res, deleted = remover.StripMolWithDeleted(mol) self.assertEqual(Chem.MolToSmiles(res), 'CN(C)C') # Because we read in SMARTS, we should output as well. Otherwise, we will have # mismatches self.assertListEqual([Chem.MolToSmarts(m) for m in deleted], ['[Cl,Br]']) # SMILES remover = SaltRemover(defnData="Cl", defnFormat=InputFormat.SMILES) mol = Chem.MolFromSmiles('CN(Br)Cl.Cl') res = remover.StripMol(mol) self.assertEqual(res.GetNumAtoms(), 4) self.assertEqual(Chem.MolToSmiles(res), 'CN(Cl)Br')
def NeutraliseCharges_RemoveSalt(smiles, reactions=None): global _reactions if reactions is None: if _reactions is None: _reactions = _InitialiseNeutralisationReactions() reactions = _reactions mol = Chem.MolFromSmiles(smiles) if mol is not None: remover = SaltRemover() mol, deleted = remover.StripMolWithDeleted(mol) replaced = False for i, (reactant, product) in enumerate(reactions): while mol.HasSubstructMatch(reactant): replaced = True rms = AllChem.ReplaceSubstructs(mol, reactant, product) mol = rms[0] if replaced: return (Chem.MolToSmiles(mol, True), True) else: return (smiles, False) else: return (None, False)
if __name__ == '__main__': if len(sys.argv) != 3: print('Usage: python rdkit_hlogp_batch.py <smiles> <batch_size>') exit() BATCH_SIZE = int(sys.argv[2]) hlogp_list = list() with open(sys.argv[1]) as smiles_file: file_lines = smiles_file.readlines() for line in file_lines: if line.strip(): smiles, cid = str(line).strip().split()[:2] mol = MolFromSmiles(smiles) remover = SaltRemover() res, deleted = remover.StripMolWithDeleted(mol) if res is not None: res.SetProp('_Name', cid) logp = MolLogP(res) num_heavy_atoms = res.GetNumHeavyAtoms() if num_heavy_atoms > 99: num_heavy_atoms = 99 scaled_logp = scale_logp_value(logp) if logp < 0.0: sign = 'M' #remove the minus sign so it's not printed scaled_logp = scaled_logp * -1 else: sign = 'P' key_string = 'H{:02}{}{:03}'.format(num_heavy_atoms, sign, scaled_logp)