def ligands(pdb: Pdb, ligand_expo: Dict[str, Mol]) -> List[Ligand]: """Ligands of a pdb Args: pdb: The pdb ligand_expo: Dictionary with molecules of ligand expo Raises: NoLigands: When PDB has no ligands Returns: List of ligands, ordered by name """ model = pdb.model() ligs = {} for amol in model.molecules(generic=True): amol_id = amol.molecule_id() lig_id = pdb.code().lower() + '_' + amol.name( ) + '_1_' + amol_id[0] + '_' + amol_id[1:] try: lig = ligand_expo[lig_id] plig = protonate_molecule(lig) ligs[lig_id] = Ligand(amol, plig) except KeyError: logger.warning( 'Unable to find {0} in ligand expo db, skipping'.format( lig_id)) pass if not ligs: raise NoLigands() return sorted(ligs.values(), key=lambda l: l.name())
def ligands(pdb: Pdb, ligand_expo: Dict[str, Mol]) -> List[Ligand]: """Ligands of a pdb Args: pdb: The pdb ligand_expo: Dictionary with molecules of ligand expo Raises: NoLigands: When PDB has no ligands Returns: List of ligands, ordered by name """ model = pdb.model() ligs = {} for amol in model.molecules(generic=True): amol_id = amol.molecule_id() # Workaround for atomium 0.8 as to handle molecules with resID 0 # NOTE: was fixed in 0.11.1, but other changes break kripo if amol_id[1:]=="": amol._id = amol_id + '0' amol_id = amol.molecule_id() lig_id = pdb.code().lower() + '_' + amol.name() + '_1_' + amol_id[0] + '_' + amol_id[1:] try: lig = ligand_expo[lig_id] plig = protonate_molecule(lig) ligs[lig_id] = Ligand(amol, plig) except KeyError: logger.warning('Unable to find {0} in ligand expo db, skipping'.format(lig_id)) pass if not ligs: raise NoLigands() return sorted(ligs.values(), key=lambda l: l.name())
def test_can_get_string_from_pdb(self, mock_string, mock_dict): pdb = Pdb() pdb_dict = Mock() mock_string.return_value = "filecontents" mock_dict.return_value = pdb_dict s = pdb.to_file_string() mock_dict.assert_called_with(pdb) mock_string.assert_called_with(pdb_dict) self.assertEqual(s, "filecontents")
def fill_serial_numbers(pdb: Pdb): """The reduce program can add hydrogens to ligands, those hydrogens will have no atom serial numbers RDKit will give parse error on a PDB block with atoms without an atom serial number. This method adds serial ids and bonds those hydrogens to their heavy atom based on it's name Args: pdb: The pdb to fill serial ids in """ model = pdb.model() max_serial_number = max( [a.atom_id() for a in model.atoms() if a.atom_id()]) for mol in model.molecules(generic=True): for a in mol.atoms(element='H'): if a.atom_id() != 0: continue max_serial_number += 1 # a.atom_id() is not a setter, so set it using private prop a._id = max_serial_number hgrp = a.name()[1] bonded = False for heavy in mol.atoms(exclude='H'): oname = heavy.name() if len(oname) > 1 and oname[1] == hgrp: logger.info('Binding {0}:{1} with {2}:{3}'.format( a.atom_id(), a.name(), heavy.atom_id(), heavy.name())) heavy.bond(a) bonded = True if not bonded: logger.warning('Unable to bind {0}:{1} to heavy atom'.format( a.atom_id(), a.name()))
def remove_unwanted_molecules(pdb: Pdb): """Remove unwanted molecules from model Cleans pdb by removing molecules which: * Have name in UNWANTED_HETEROS list * Is out side LIGAND_MAX_MASS..LIGAND_MIN_MASS mass range * Have name already processed (aka removes duplicates) Removing is done in-place. Args: pdb: Atomium PDB entry containing possible unwanted molecules """ unique_names = set() model = pdb.model() for mol in sorted(model.molecules(generic=True), key=lambda m: m.molecule_id()): is_unwanted = mol.name() in UNWANTED_HETEROS in_mass_range = LIGAND_MIN_MASS < mol.mass() < LIGAND_MAX_MASS seen_before = mol.name() in unique_names if is_unwanted or not in_mass_range or seen_before: model.remove_molecule(mol) else: unique_names.add(mol.name())
def test_can_create_pdb(self): pdb = Pdb() self.assertEqual(pdb._models, []) self.assertEqual(pdb._code, None) self.assertEqual(pdb._deposition_date, None) self.assertEqual(pdb._title, None) self.assertEqual(pdb._resolution, None) self.assertEqual(pdb._organism, None) self.assertEqual(pdb._expression_system, None) self.assertEqual(pdb._technique, None) self.assertEqual(pdb._classification, None) self.assertEqual(pdb._rfactor, None)
def protonate_pdb(pdb: Pdb) -> Pdb: """Hydrogenate a first model of PDB 1. Passes generic molecules to protonate_ligand, ligands which already contain hydrogens are skipped 2. Take H in protonated ligand block and add to existing pdb 3. Pass pdb to protonate_protein Args: pdb: The pdb to hydrogenate Returns: A pdb with hydrogens added """ # Protonate whole pdb unprotonated_block = pdb.to_file_string() protonated_block = protonate_protein(unprotonated_block) protonated_pdb = pdb_dict_to_pdb( pdb_string_to_pdb_dict(protonated_block[0])) fill_serial_numbers(protonated_pdb) return protonated_pdb
def remove_non_contacting_molecules(pdb: Pdb): """Remove unwanted molecules from model Cleans pdb by removing molecules which: * Is more then MAX_CONTACT_DISTANCE away from protein Removing is done in-place. Args: pdb: Atomium PDB entry containing possible unwanted molecules """ model = pdb.model() for mol in sorted(model.molecules(generic=True), key=lambda m: m.molecule_id()): in_contact_with_protein = ligand_contacts_protein(mol, model) if not in_contact_with_protein: try: model.remove_molecule(mol) except KeyError: # in 1efr was unable to delete atom with key 22969, ignore error pass
def test_can_update_code(self): pdb = Pdb() pdb._code = "1xxx" pdb.code = "2yyy" self.assertEqual(pdb._code, "2yyy")
def test_can_get_no_model(self): pdb = Pdb() self.assertIsNone(pdb.model)
def test_can_get_pdb_code(self): pdb = Pdb() pdb._code = "1xxx" self.assertIs(pdb._code, pdb.code)
def test_can_get_pdb_models(self): pdb = Pdb() pdb._models = ["1", "2", "3"] self.assertEqual(pdb.models, ("1", "2", "3"))
def test_model_gets_first_model(self): pdb = Pdb() pdb._models = ["1", "2", "3"] self.assertEqual(pdb.model, "1")
def test_classification_must_be_str(self): pdb = Pdb() with self.assertRaises(TypeError): pdb.classification = 100
def test_resolution_must_be_number(self): pdb = Pdb() with self.assertRaises(TypeError): pdb.resolution = "100" pdb.resolution = 3
def test_can_get_pdb_resolution(self): pdb = Pdb() pdb._resolution = 1.2 self.assertIs(pdb._resolution, pdb.resolution)
def test_pdb_repr_one_model(self): pdb = Pdb() pdb._models = ["1"] self.assertEqual(str(pdb), "<Pdb (1 model)>")
def test_pdb_repr_no_models(self): pdb = Pdb() self.assertEqual(str(pdb), "<Pdb (0 models)>")
def test_can_update_date(self): pdb = Pdb() pdb._deposition_date = "date" pdb.deposition_date = datetime(2017, 9, 21).date() self.assertEqual(pdb._deposition_date, datetime(2017, 9, 21).date())
def test_rfactor_must_be_number(self): pdb = Pdb() with self.assertRaises(TypeError): pdb.rfactor = "100" pdb.rfactor = 3
def test_can_update_rfactor(self): pdb = Pdb() pdb._rfactor = 1.2 pdb.rfactor = 1.5 self.assertEqual(pdb._rfactor, 1.5)
def test_can_get_pdb_rfactor(self): pdb = Pdb() pdb._rfactor = 1.2 self.assertIs(pdb._rfactor, pdb.rfactor)
def test_code_must_be_str(self): pdb = Pdb() with self.assertRaises(TypeError): pdb.code = 100
def test_pdb_repr_multiple_models(self): pdb = Pdb() pdb._models = ["1", "2", "3"] self.assertEqual(str(pdb), "<Pdb (3 models)>")
def test_code_must_be_valid(self): pdb = Pdb() with self.assertRaises(ValueError): pdb.code = "1xxxx" with self.assertRaises(ValueError): pdb.code = "1xx"
def test_pdb_repr_with_code(self): pdb = Pdb() pdb._code = "1XXX" pdb._models = ["1", "2", "3"] self.assertEqual(str(pdb), "<Pdb 1XXX (3 models)>")
def test_can_get_pdb_date(self): pdb = Pdb() pdb._deposition_date = "date" self.assertIs(pdb._deposition_date, pdb.deposition_date)
def test_can_update_resolution(self): pdb = Pdb() pdb._resolution = 1.2 pdb.resolution = 1.5 self.assertEqual(pdb._resolution, 1.5)
def test_date_must_be_date(self): pdb = Pdb() with self.assertRaises(TypeError): pdb.deposition_date = "date"
def test_can_save_pdb_to_file(self, mock_string, mock_save): pdb = Pdb() mock_string.return_value = "filestring" pdb.save("test.pdb") mock_save.assert_called_with("filestring", "test.pdb")