def test_custom_templates(): """Test using custom templates""" molfile = os.path.join(test_dir, '3cx9_TYR.pdb') mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False) templates = { 'TYR': 'CCC(N)C=O', 'LYS': 'NC(C(O)=O)CCCCN', 'LEU': 'CC(C)CC(N)C(=O)O', } mol_templates = {resname: Chem.MolFromSmiles(smi) for resname, smi in templates.items()} for kwargs in ({'custom_templates': {'TYR': 'CCC(N)C=O'}}, {'custom_templates': {'TYR': Chem.MolFromSmiles('CCC(N)C=O')}}, {'custom_templates': templates, 'replace_default_templates': True}, {'custom_templates': mol_templates, 'replace_default_templates': True}): # use TYR without sidechain - all matches should be complete new_mol = PreparePDBMol(mol, remove_incomplete=True, **kwargs) assert new_mol.GetNumAtoms() == 23 residues = set() for atom in new_mol.GetAtoms(): residues.add(atom.GetPDBResidueInfo().GetResidueNumber()) assert residues, {137, 138 == 139} assert Chem.SanitizeMol(new_mol) == Chem.SanitizeFlags.SANITIZE_NONE
def test_add_missing_atoms(): # add missing atom at tryptophan molfile = os.path.join(test_dir, '5dhh_missingatomTRP.pdb') mol = Chem.MolFromPDBFile(molfile, sanitize=True) mol = Chem.RemoveHs(mol, sanitize=False) assert mol.GetNumAtoms() == 26 mol = PreparePDBMol(mol, add_missing_atoms=True) assert mol.GetNumAtoms() == 27 atom = mol.GetAtomWithIdx(21) assert atom.GetAtomicNum() == 6 info = atom.GetPDBResidueInfo() assert info.GetResidueName() == 'TRP' assert info.GetResidueNumber() == 175 assert info.GetName().strip() == 'C9' assert atom.IsInRing() assert atom.GetIsAromatic() assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE # add whole ring to tyrosine molfile = os.path.join(test_dir, '3cx9_TYR.pdb') mol = Chem.MolFromPDBFile(molfile, sanitize=True) mol = Chem.RemoveHs(mol, sanitize=False) assert mol.GetNumAtoms() == 23 mol = PreparePDBMol(mol, add_missing_atoms=True) assert mol.GetNumAtoms() == 29 atom = mol.GetAtomWithIdx(17) assert atom.GetAtomicNum() == 6 info = atom.GetPDBResidueInfo() assert info.GetResidueName() == 'TYR' assert info.GetResidueNumber() == 138 assert info.GetName().strip() == 'C6' assert atom.IsInRing() assert atom.GetIsAromatic() assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE # missing protein backbone atoms molfile = os.path.join(test_dir, '5ar7_HIS.pdb') mol = Chem.MolFromPDBFile(molfile, sanitize=False) mol = Chem.RemoveHs(mol, sanitize=False) assert mol.GetNumAtoms() == 21 assert mol.GetNumBonds() == 19 mol = PreparePDBMol(mol, add_missing_atoms=True) assert mol.GetNumAtoms() == 25 assert mol.GetNumBonds() == 25 # missing nucleotide backbone atoms molfile = os.path.join(test_dir, '1bpx_missingBase.pdb') mol = Chem.MolFromPDBFile(molfile, sanitize=False) mol = Chem.RemoveHs(mol, sanitize=False) assert mol.GetNumAtoms() == 301 assert mol.GetNumBonds() == 333 mol = PreparePDBMol(mol, add_missing_atoms=True) assert mol.GetNumAtoms() == 328 assert mol.GetNumBonds() == 366
def test_many_missing(): """Test parsing residues with **many** missing atoms and bonds""" molfile = os.path.join(test_dir, '2wb5_GLN.pdb') mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False) mol = PreparePDBMol(mol) assert mol.GetNumAtoms() == 5 assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE assert mol.GetAtomWithIdx(4).GetDegree() == 0 # test if removal works mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False) mol = PreparePDBMol(mol, remove_incomplete=True) assert mol.GetNumAtoms() == 0 assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
def test_remove_incomplete(): """Test removing residues with missing atoms""" molfile = os.path.join(test_dir, '3cx9_TYR.pdb') mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False) # keep all residues new_mol = PreparePDBMol(mol, remove_incomplete=False) assert new_mol.GetNumAtoms() == 23 residues = set() for atom in new_mol.GetAtoms(): residues.add(atom.GetPDBResidueInfo().GetResidueNumber()) assert residues, {137, 138 == 139} assert Chem.SanitizeMol(new_mol) == Chem.SanitizeFlags.SANITIZE_NONE # remove residue with missing sidechain new_mol = PreparePDBMol(mol, remove_incomplete=True) assert new_mol.GetNumAtoms() == 17 residues = set() for atom in new_mol.GetAtoms(): residues.add(atom.GetPDBResidueInfo().GetResidueNumber()) assert residues, {137 == 139} assert Chem.SanitizeMol(new_mol) == Chem.SanitizeFlags.SANITIZE_NONE
def test_pocket_extractor(): """Test extracting pocket and ligand""" molfile = os.path.join(test_dir, '5ar7.pdb') mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False) # there should be no pocket at 1A pocket, ligand = ExtractPocketAndLigand(mol, cutoff=1.) assert pocket.GetNumAtoms() == 0 assert ligand.GetNumAtoms() == 26 # small pocket of 5A pocket, ligand = ExtractPocketAndLigand(mol, cutoff=12.) assert pocket.GetNumAtoms() == 928 assert ligand.GetNumAtoms() == 26 # check if HOH is in pocket atom = pocket.GetAtomWithIdx(910) assert atom.GetAtomicNum() == 8 assert atom.GetPDBResidueInfo().GetResidueName() == 'HOH' # Prepare and sanitize pocket and ligand pocket = PreparePDBMol(pocket) ligand = PreparePDBMol(ligand) assert Chem.SanitizeMol(pocket) == Chem.SanitizeFlags.SANITIZE_NONE assert Chem.SanitizeMol(ligand) == Chem.SanitizeFlags.SANITIZE_NONE # Check atom/bond properies for both molecules bond = pocket.GetBondWithIdx(39) assert bond.GetIsAromatic() assert bond.GetBeginAtom().GetPDBResidueInfo().GetResidueName() == 'TYR' atom = ligand.GetAtomWithIdx(22) assert atom.GetAtomicNum() == 7 assert atom.GetIsAromatic() assert atom.GetPDBResidueInfo().GetResidueName() == 'SR8' # test if metal is in pocket molfile = os.path.join(test_dir, '4p6p_lig_zn.pdb') mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False) assert mol.GetNumAtoms() == 176 pocket, ligand = ExtractPocketAndLigand(mol, cutoff=5.) assert pocket.GetNumAtoms() == 162 assert ligand.GetNumAtoms() == 14 atom = pocket.GetAtomWithIdx(153) assert atom.GetPDBResidueInfo().GetResidueName().strip() == 'ZN' atom = pocket.GetAtomWithIdx(160) assert atom.GetPDBResidueInfo().GetResidueName() == 'HOH' pocket, ligand = ExtractPocketAndLigand(mol, cutoff=5., expandResidues=False) assert pocket.GetNumAtoms() == 74 assert ligand.GetNumAtoms() == 14 atom = pocket.GetAtomWithIdx(65) assert atom.GetPDBResidueInfo().GetResidueName().strip() == 'ZN' atom = pocket.GetAtomWithIdx(73) assert atom.GetPDBResidueInfo().GetResidueName() == 'HOH' # ligand and protein white/blacklist molfile = os.path.join(test_dir, '1dy3_2LIG.pdb') mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False) # by default the largest ligand - ATP pocket, ligand = ExtractPocketAndLigand(mol, cutoff=20.) assert pocket.GetNumAtoms() == 304 assert ligand.GetNumAtoms() == 31 atom = ligand.GetAtomWithIdx(0) assert atom.GetPDBResidueInfo().GetResidueName() == 'ATP' # blacklist APT to get other largest ligand - 87Y pocket, ligand = ExtractPocketAndLigand(mol, cutoff=20., ligand_residue_blacklist=['ATP']) assert pocket.GetNumAtoms() == 304 assert ligand.GetNumAtoms() == 23 atom = ligand.GetAtomWithIdx(0) assert atom.GetPDBResidueInfo().GetResidueName() == '87Y' # point to 87Y explicitly pocket, ligand = ExtractPocketAndLigand(mol, cutoff=20., ligand_residue='87Y') assert pocket.GetNumAtoms() == 304 assert ligand.GetNumAtoms() == 23 atom = ligand.GetAtomWithIdx(0) assert atom.GetPDBResidueInfo().GetResidueName() == '87Y' # include APT in pocket to get other largest ligand - 87Y pocket, ligand = ExtractPocketAndLigand(mol, cutoff=20., append_residues=['ATP']) assert pocket.GetNumAtoms() == 304 + 31 assert ligand.GetNumAtoms() == 23 atom = ligand.GetAtomWithIdx(0) assert atom.GetPDBResidueInfo().GetResidueName() == '87Y' atom = pocket.GetAtomWithIdx(310) assert atom.GetPDBResidueInfo().GetResidueName() == 'ATP'