def _featurize_complex(self, mol_pdb_file, protein_pdb_file): try: frag1_coords, frag1_mol = rdkit_util.load_molecule( mol_pdb_file, is_protein=False, sanitize=True, add_hydrogens=False) frag2_coords, frag2_mol = rdkit_util.load_molecule( protein_pdb_file, is_protein=True, sanitize=True, add_hydrogens=False) except MoleculeLoadException: # Currently handles loading failures by returning None # TODO: Is there a better handling procedure? logging.warning("Some molecules cannot be loaded by Rdkit. Skipping") return None system_mol = rdkit_util.merge_molecules([frag1_mol, frag2_mol]) system_coords = rdkit_util.get_xyz_from_mol(system_mol) frag1_coords, frag1_mol = self._strip_hydrogens(frag1_coords, frag1_mol) frag2_coords, frag2_mol = self._strip_hydrogens(frag2_coords, frag2_mol) system_coords, system_mol = self._strip_hydrogens(system_coords, system_mol) try: frag1_coords, frag1_neighbor_list, frag1_z = self.featurize_mol( frag1_coords, frag1_mol, self.frag1_num_atoms) frag2_coords, frag2_neighbor_list, frag2_z = self.featurize_mol( frag2_coords, frag2_mol, self.frag2_num_atoms) system_coords, system_neighbor_list, system_z = self.featurize_mol( system_coords, system_mol, self.complex_num_atoms) except ValueError as e: logging.warning( "max_atoms was set too low. Some complexes too large and skipped") return None return frag1_coords, frag1_neighbor_list, frag1_z, frag2_coords, frag2_neighbor_list, frag2_z, \ system_coords, system_neighbor_list, system_z
def test_merge_molecules(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule(ligand_file, calc_charges=False, add_hydrogens=False) num_mol_atoms = mol.GetNumAtoms() # self.ligand_file is for 3ws9_ligand.sdf oth_xyz, oth_mol = rdkit_util.load_molecule(self.ligand_file, calc_charges=False, add_hydrogens=False) num_oth_mol_atoms = oth_mol.GetNumAtoms() merged = rdkit_util.merge_molecules([mol, oth_mol]) merged_num_atoms = merged.GetNumAtoms() assert merged_num_atoms == num_mol_atoms + num_oth_mol_atoms
def _featurize_complex(self, mol_pdb_file, protein_pdb_file): frag1_coords, frag1_mol = rdkit_util.load_molecule(mol_pdb_file) frag2_coords, frag2_mol = rdkit_util.load_molecule(protein_pdb_file) system_mol = rdkit_util.merge_molecules(frag1_mol, frag2_mol) system_coords = rdkit_util.get_xyz_from_mol(system_mol) frag1_coords, frag1_mol = self._strip_hydrogens(frag1_coords, frag1_mol) frag2_coords, frag2_mol = self._strip_hydrogens(frag2_coords, frag2_mol) system_coords, system_mol = self._strip_hydrogens(system_coords, system_mol) frag1_coords, frag1_neighbor_list, frag1_z = self.featurize_mol( frag1_coords, frag1_mol, self.frag1_num_atoms) frag2_coords, frag2_neighbor_list, frag2_z = self.featurize_mol( frag2_coords, frag2_mol, self.frag2_num_atoms) system_coords, system_neighbor_list, system_z = self.featurize_mol( system_coords, system_mol, self.complex_num_atoms) return frag1_coords, frag1_neighbor_list, frag1_z, frag2_coords, frag2_neighbor_list, frag2_z, \ system_coords, system_neighbor_list, system_z