def score(self, ligands, protein=None): """Automated scoring procedure. Parameters ---------- ligands: iterable of oddt.toolkit.Molecule objects Ligands to score protein: oddt.toolkit.Molecule object or None Protein object to be used. If None, then the default one is used, else the protein is new default. Returns ------- ligands : array of oddt.toolkit.Molecule objects Array of ligands (scores are stored in mol.data method) """ if protein: self.set_protein(protein) if not self.protein_file: raise IOError("No receptor.") if is_molecule(ligands): ligands = [ligands] ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_') output_array = [] for n, ligand in enumerate(ligands): check_molecule(ligand, force_coords=True) ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n) try: scores = parse_vina_scoring_output( subprocess.check_output([ self.executable, '--score_only', '--receptor', self.protein_file, '--ligand', ligand_file ] + self.params, stderr=subprocess.STDOUT)) except subprocess.CalledProcessError as e: sys.stderr.write(e.output.decode('ascii')) if self.skip_bad_mols: continue else: raise Exception('Autodock Vina failed. Command: "%s"' % ' '.join(e.cmd)) ligand.data.update(scores) output_array.append(ligand) rmtree(ligand_dir) return output_array
def score(self, ligands, protein=None): """Automated scoring procedure. Parameters ---------- ligands: iterable of oddt.toolkit.Molecule objects Ligands to score protein: oddt.toolkit.Molecule object or None Protein object to be used. If None, then the default one is used, else the protein is new default. Returns ------- ligands : array of oddt.toolkit.Molecule objects Array of ligands (scores are stored in mol.data method) """ if protein: self.set_protein(protein) if not self.protein_file: raise IOError("No receptor.") if is_molecule(ligands): ligands = [ligands] ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_') output_array = [] for n, ligand in enumerate(ligands): check_molecule(ligand, force_coords=True) ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n) try: scores = parse_vina_scoring_output( subprocess.check_output([self.executable, '--score_only', '--receptor', self.protein_file, '--ligand', ligand_file] + self.params, stderr=subprocess.STDOUT)) except subprocess.CalledProcessError as e: sys.stderr.write(e.output.decode('ascii')) if self.skip_bad_mols: continue else: raise Exception('Autodock Vina failed. Command: "%s"' % ' '.join(e.cmd)) ligand.data.update(scores) output_array.append(ligand) rmtree(ligand_dir) return output_array
def test_check_molecule(): assert_raises_regexp(ValueError, 'Molecule object', check_molecule, []) ligand = next(oddt.toolkit.readfile('sdf', xiap_crystal_ligand)) check_molecule(ligand) # force protein protein = next(oddt.toolkit.readfile('pdb', xiap_protein)) assert_raises_regexp(ValueError, 'marked as a protein', check_molecule, protein, force_protein=True) protein.protein = True check_molecule(protein, force_protein=True) # force coordinates mol = oddt.toolkit.readstring('smi', 'c1ccccc1') assert_raises_regexp(ValueError, '3D coordinates', check_molecule, mol, force_coords=True) mol.make3D() check_molecule(mol, force_coords=True) #assert_raises_regexp(ValueError, 'positional', check_molecule, mol, True) mol = oddt.toolkit.readstring('sdf', '''mol_title handmade 0 0 0 0 0 0 0 0 0 0999 V2000 M END ''') assert_raises_regexp(ValueError, 'has zero atoms', check_molecule, mol, non_zero_atoms=True)
def diverse_conformers_generator(mol, n_conf=10, method='confab', seed=None, **kwargs): """Produce diverse conformers using current conformer as starting point. Returns a generator. Each conformer is a copy of original molecule object. .. versionadded:: 0.6 Parameters ---------- mol : oddt.toolkit.Molecule object Molecule for which generating conformers n_conf : int (default=10) Targer number of conformers method : string (default='confab') Method for generating conformers. Supported methods: * confab * ga seed : None or int (default=None) Random seed mutability : int (default=5) The inverse of probability of mutation. By default 5, which translates to 1/5 (20%) chance of mutation. This setting only works with genetic algorithm method ("ga"). convergence : int (default=5) The number of generations with unchanged fitness, should the algorothm converge. This setting only works with genetic algorithm method ("ga"). rmsd : float (default=0.5) The conformers are pruned unless their RMSD is higher than this cutoff. This setting only works with Confab method ("confab"). nconf : int (default=10000) The number of initial conformers to generate before energy pruning. This setting only works with Confab method ("confab"). energy_gap : float (default=5000.) Energy gap from the lowest energy conformer to the highest possible. This setting only works with Confab method ("confab"). Returns ------- mols : list of oddt.toolkit.Molecule objects Molecules with diverse conformers """ if __version__ < '2.4.0': raise NotImplementedError('Diverse conformer generation is not ' 'implemented in OpenBabel before 2.4.0.') check_molecule(mol, force_coords=True) mol_clone = mol.clone if seed is not None: rand = ob.OBRandom(True) rand.Seed(seed) if method == 'ga': if not hasattr(ob, 'OBConformerSearch'): raise ValueError('OpenBabel needs to be compiled with eigen to ' 'perform conformer search.') cs = ob.OBConformerSearch() cs.Setup( mol_clone.OBMol, n_conf, # numConformers n_conf * 2, # numChildren kwargs.get('mutability', 5), # mutability kwargs.get('convergence', 5)) # convergence cs.Search() cs.GetConformers(mol_clone.OBMol) elif method == 'confab': ff = pybel._forcefields['uff'] ff.Setup(mol_clone.OBMol) ff.DiverseConfGen( kwargs.get('rmsd', 0.5), # rmsd kwargs.get('nconfs', 10000), # nconfs (initial) kwargs.get('energy_gap', 5000.0), # energy_gap False) # verbose ff.GetConformers(mol_clone.OBMol) else: raise ValueError('Method %s is not implemented' % method) out = [] for i in range(mol_clone.OBMol.NumConformers()): if i >= n_conf: break mol_output_clone = mol_clone.clone mol_output_clone.OBMol.SetConformer(i) out.append(mol_output_clone) return out
def test_check_molecule(): with pytest.raises(ValueError, match='Molecule object'): check_molecule([]) ligand = next(oddt.toolkit.readfile('sdf', xiap_crystal_ligand)) check_molecule(ligand) # force protein protein = next(oddt.toolkit.readfile('pdb', xiap_protein)) with pytest.raises(ValueError, match='marked as a protein'): check_molecule(protein, force_protein=True) protein.protein = True check_molecule(protein, force_protein=True) # force coordinates mol = oddt.toolkit.readstring('smi', 'c1ccccc1') with pytest.raises(ValueError, match='3D coordinates'): check_molecule(mol, force_coords=True) mol.make3D() check_molecule(mol, force_coords=True) # with pytest.raises(ValueError, match='positional'): # check_molecule(mol, True) mol = oddt.toolkit.readstring( 'sdf', '''mol_title handmade 0 0 0 0 0 0 0 0 0 0999 V2000 M END ''') with pytest.raises(ValueError, match='has zero atoms'): check_molecule(mol, non_zero_atoms=True)
def test_check_molecule(): with pytest.raises(ValueError, match='Molecule object'): check_molecule([]) ligand = next(oddt.toolkit.readfile('sdf', xiap_crystal_ligand)) check_molecule(ligand) # force protein protein = next(oddt.toolkit.readfile('pdb', xiap_protein)) with pytest.raises(ValueError, match='marked as a protein'): check_molecule(protein, force_protein=True) protein.protein = True check_molecule(protein, force_protein=True) # force coordinates mol = oddt.toolkit.readstring('smi', 'c1ccccc1') with pytest.raises(ValueError, match='3D coordinates'): check_molecule(mol, force_coords=True) mol.make3D() check_molecule(mol, force_coords=True) # with pytest.raises(ValueError, match='positional'): # check_molecule(mol, True) mol = oddt.toolkit.readstring('sdf', '''mol_title handmade 0 0 0 0 0 0 0 0 0 0999 V2000 M END ''') with pytest.raises(ValueError, match='has zero atoms'): check_molecule(mol, non_zero_atoms=True)
def dock(self, ligands, protein=None): """Automated docking procedure. Parameters ---------- ligands: iterable of oddt.toolkit.Molecule objects Ligands to dock protein: oddt.toolkit.Molecule object or None Protein object to be used. If None, then the default one is used, else the protein is new default. Returns ------- ligands : array of oddt.toolkit.Molecule objects Array of ligands (scores are stored in mol.data method) """ if protein: self.set_protein(protein) if not self.protein_file: raise IOError("No receptor.") if is_molecule(ligands): ligands = [ligands] ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_') output_array = [] for n, ligand in enumerate(ligands): check_molecule(ligand, force_coords=True) ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n) ligand_outfile = ligand_file[:-6] + '_out.pdbqt' try: scores = parse_vina_docking_output( subprocess.check_output([ self.executable, '--receptor', self.protein_file, '--ligand', ligand_file, '--out', ligand_outfile ] + self.params + ['--cpu', str(self.n_cpu)], stderr=subprocess.STDOUT)) except subprocess.CalledProcessError as e: sys.stderr.write(e.output.decode('ascii')) if self.skip_bad_mols: continue # TODO: print some warning message else: raise Exception('Autodock Vina failed. Command: "%s"' % ' '.join(e.cmd)) # docked conformations may have wrong connectivity - use source ligand if is_openbabel_molecule(ligand): if oddt.toolkits.ob.__version__ >= '2.4.0': # find the order of PDBQT atoms assigned by OpenBabel with open(ligand_file) as f: write_order = [ int(line[7:12].strip()) for line in f if line[:4] == 'ATOM' ] new_order = sorted(range(len(write_order)), key=write_order.__getitem__) new_order = [i + 1 for i in new_order] # OBMol has 1 based idx assert len(new_order) == len(ligand.atoms) else: # Openbabel 2.3.2 does not support perserving atom order. # We read back the PDBQT ligand to get "correct" bonding. ligand = next(oddt.toolkit.readfile('pdbqt', ligand_file)) if 'REMARK' in ligand.data: del ligand.data['REMARK'] docked_ligands = oddt.toolkit.readfile('pdbqt', ligand_outfile) for docked_ligand, score in zip(docked_ligands, scores): # Renumber atoms to match the input ligand if (is_openbabel_molecule(docked_ligand) and oddt.toolkits.ob.__version__ >= '2.4.0'): docked_ligand.OBMol.RenumberAtoms(new_order) # HACK: copy docked coordinates onto source ligand # We assume that the order of atoms match between ligands clone = ligand.clone clone.clone_coords(docked_ligand) clone.data.update(score) # Calculate RMSD to the input pose try: clone.data['vina_rmsd_input'] = rmsd(ligand, clone) clone.data['vina_rmsd_input_min'] = rmsd( ligand, clone, method='min_symmetry') except Exception: pass output_array.append(clone) rmtree(ligand_dir) return output_array
def dock(self, ligands, protein=None): """Automated docking procedure. Parameters ---------- ligands: iterable of oddt.toolkit.Molecule objects Ligands to dock protein: oddt.toolkit.Molecule object or None Protein object to be used. If None, then the default one is used, else the protein is new default. Returns ------- ligands : array of oddt.toolkit.Molecule objects Array of ligands (scores are stored in mol.data method) """ if protein: self.set_protein(protein) if not self.protein_file: raise IOError("No receptor.") if is_molecule(ligands): ligands = [ligands] ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_') output_array = [] for n, ligand in enumerate(ligands): check_molecule(ligand, force_coords=True) ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n) ligand_outfile = ligand_file[:-6] + '_out.pdbqt' try: scores = parse_vina_docking_output( subprocess.check_output([self.executable, '--receptor', self.protein_file, '--ligand', ligand_file, '--out', ligand_outfile] + self.params + ['--cpu', str(self.n_cpu)], stderr=subprocess.STDOUT)) except subprocess.CalledProcessError as e: sys.stderr.write(e.output.decode('ascii')) if self.skip_bad_mols: continue # TODO: print some warning message else: raise Exception('Autodock Vina failed. Command: "%s"' % ' '.join(e.cmd)) # docked conformations may have wrong connectivity - use source ligand if is_openbabel_molecule(ligand): if oddt.toolkits.ob.__version__ >= '2.4.0': # find the order of PDBQT atoms assigned by OpenBabel with open(ligand_file) as f: write_order = [int(line[7:12].strip()) for line in f if line[:4] == 'ATOM'] new_order = sorted(range(len(write_order)), key=write_order.__getitem__) new_order = [i + 1 for i in new_order] # OBMol has 1 based idx assert len(new_order) == len(ligand.atoms) else: # Openbabel 2.3.2 does not support perserving atom order. # We read back the PDBQT ligand to get "correct" bonding. ligand = next(oddt.toolkit.readfile('pdbqt', ligand_file)) if 'REMARK' in ligand.data: del ligand.data['REMARK'] docked_ligands = oddt.toolkit.readfile('pdbqt', ligand_outfile) for docked_ligand, score in zip(docked_ligands, scores): # Renumber atoms to match the input ligand if (is_openbabel_molecule(docked_ligand) and oddt.toolkits.ob.__version__ >= '2.4.0'): docked_ligand.OBMol.RenumberAtoms(new_order) # HACK: copy docked coordinates onto source ligand # We assume that the order of atoms match between ligands clone = ligand.clone clone.clone_coords(docked_ligand) clone.data.update(score) # Calculate RMSD to the input pose clone.data['vina_rmsd_input'] = rmsd(ligand, clone) clone.data['vina_rmsd_input_min'] = rmsd(ligand, clone, method='min_symmetry') output_array.append(clone) rmtree(ligand_dir) return output_array