Beispiel #1
0
    def score(self, ligands, protein=None):
        """Automated scoring procedure.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecule objects
            Ligands to score

        protein: oddt.toolkit.Molecule object or None
            Protein object to be used. If None, then the default
            one is used, else the protein is new default.

        Returns
        -------
        ligands : array of oddt.toolkit.Molecule objects
            Array of ligands (scores are stored in mol.data method)
        """
        if protein:
            self.set_protein(protein)
        if not self.protein_file:
            raise IOError("No receptor.")
        if is_molecule(ligands):
            ligands = [ligands]
        ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_')
        output_array = []
        for n, ligand in enumerate(ligands):
            check_molecule(ligand, force_coords=True)
            ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n)
            try:
                scores = parse_vina_scoring_output(
                    subprocess.check_output([
                        self.executable, '--score_only', '--receptor',
                        self.protein_file, '--ligand', ligand_file
                    ] + self.params,
                                            stderr=subprocess.STDOUT))
            except subprocess.CalledProcessError as e:
                sys.stderr.write(e.output.decode('ascii'))
                if self.skip_bad_mols:
                    continue
                else:
                    raise Exception('Autodock Vina failed. Command: "%s"' %
                                    ' '.join(e.cmd))
            ligand.data.update(scores)
            output_array.append(ligand)
        rmtree(ligand_dir)
        return output_array
Beispiel #2
0
    def score(self, ligands, protein=None):
        """Automated scoring procedure.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecule objects
            Ligands to score

        protein: oddt.toolkit.Molecule object or None
            Protein object to be used. If None, then the default
            one is used, else the protein is new default.

        Returns
        -------
        ligands : array of oddt.toolkit.Molecule objects
            Array of ligands (scores are stored in mol.data method)
        """
        if protein:
            self.set_protein(protein)
        if not self.protein_file:
            raise IOError("No receptor.")
        if is_molecule(ligands):
            ligands = [ligands]
        ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_')
        output_array = []
        for n, ligand in enumerate(ligands):
            check_molecule(ligand, force_coords=True)
            ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n)
            try:
                scores = parse_vina_scoring_output(
                    subprocess.check_output([self.executable, '--score_only',
                                             '--receptor', self.protein_file,
                                             '--ligand', ligand_file] + self.params,
                                            stderr=subprocess.STDOUT))
            except subprocess.CalledProcessError as e:
                sys.stderr.write(e.output.decode('ascii'))
                if self.skip_bad_mols:
                    continue
                else:
                    raise Exception('Autodock Vina failed. Command: "%s"' %
                                    ' '.join(e.cmd))
            ligand.data.update(scores)
            output_array.append(ligand)
        rmtree(ligand_dir)
        return output_array
Beispiel #3
0
def test_check_molecule():
    assert_raises_regexp(ValueError, 'Molecule object', check_molecule, [])

    ligand = next(oddt.toolkit.readfile('sdf', xiap_crystal_ligand))
    check_molecule(ligand)

    # force protein
    protein = next(oddt.toolkit.readfile('pdb', xiap_protein))
    assert_raises_regexp(ValueError,
                         'marked as a protein',
                         check_molecule,
                         protein,
                         force_protein=True)

    protein.protein = True
    check_molecule(protein, force_protein=True)

    # force coordinates
    mol = oddt.toolkit.readstring('smi', 'c1ccccc1')
    assert_raises_regexp(ValueError,
                         '3D coordinates',
                         check_molecule,
                         mol,
                         force_coords=True)
    mol.make3D()
    check_molecule(mol, force_coords=True)

    #assert_raises_regexp(ValueError, 'positional', check_molecule, mol, True)

    mol = oddt.toolkit.readstring('sdf', '''mol_title
 handmade

  0  0  0  0  0  0  0  0  0  0999 V2000
M  END
                          ''')
    assert_raises_regexp(ValueError,
                         'has zero atoms',
                         check_molecule,
                         mol,
                         non_zero_atoms=True)
Beispiel #4
0
def diverse_conformers_generator(mol,
                                 n_conf=10,
                                 method='confab',
                                 seed=None,
                                 **kwargs):
    """Produce diverse conformers using current conformer as starting point.
    Returns a generator. Each conformer is a copy of original molecule object.

    .. versionadded:: 0.6

    Parameters
    ----------
    mol : oddt.toolkit.Molecule object
        Molecule for which generating conformers

    n_conf : int (default=10)
        Targer number of conformers

    method : string (default='confab')
        Method for generating conformers. Supported methods:
        * confab
        * ga

    seed : None or int (default=None)
        Random seed

    mutability : int (default=5)
        The inverse of probability of mutation. By default 5, which translates
        to 1/5 (20%) chance of mutation. This setting only works with genetic
        algorithm method ("ga").

    convergence : int (default=5)
        The number of generations with unchanged fitness, should the algorothm
        converge. This setting only works with genetic algorithm method ("ga").

    rmsd : float (default=0.5)
        The conformers are pruned unless their RMSD is higher than this cutoff.
        This setting only works with Confab method ("confab").

    nconf : int (default=10000)
        The number of initial conformers to generate before energy pruning.
        This setting only works with Confab method ("confab").

    energy_gap : float (default=5000.)
        Energy gap from the lowest energy conformer to the highest possible.
        This setting only works with Confab method ("confab").

    Returns
    -------
    mols : list of oddt.toolkit.Molecule objects
        Molecules with diverse conformers
    """
    if __version__ < '2.4.0':
        raise NotImplementedError('Diverse conformer generation is not '
                                  'implemented in OpenBabel before 2.4.0.')

    check_molecule(mol, force_coords=True)
    mol_clone = mol.clone
    if seed is not None:
        rand = ob.OBRandom(True)
        rand.Seed(seed)
    if method == 'ga':
        if not hasattr(ob, 'OBConformerSearch'):
            raise ValueError('OpenBabel needs to be compiled with eigen to '
                             'perform conformer search.')
        cs = ob.OBConformerSearch()
        cs.Setup(
            mol_clone.OBMol,
            n_conf,  # numConformers
            n_conf * 2,  # numChildren
            kwargs.get('mutability', 5),  # mutability
            kwargs.get('convergence', 5))  # convergence
        cs.Search()
        cs.GetConformers(mol_clone.OBMol)
    elif method == 'confab':
        ff = pybel._forcefields['uff']
        ff.Setup(mol_clone.OBMol)
        ff.DiverseConfGen(
            kwargs.get('rmsd', 0.5),  # rmsd
            kwargs.get('nconfs', 10000),  # nconfs (initial)
            kwargs.get('energy_gap', 5000.0),  # energy_gap
            False)  # verbose
        ff.GetConformers(mol_clone.OBMol)
    else:
        raise ValueError('Method %s is not implemented' % method)

    out = []
    for i in range(mol_clone.OBMol.NumConformers()):
        if i >= n_conf:
            break
        mol_output_clone = mol_clone.clone
        mol_output_clone.OBMol.SetConformer(i)
        out.append(mol_output_clone)
    return out
Beispiel #5
0
def test_check_molecule():
    with pytest.raises(ValueError, match='Molecule object'):
        check_molecule([])

    ligand = next(oddt.toolkit.readfile('sdf', xiap_crystal_ligand))
    check_molecule(ligand)

    # force protein
    protein = next(oddt.toolkit.readfile('pdb', xiap_protein))
    with pytest.raises(ValueError, match='marked as a protein'):
        check_molecule(protein, force_protein=True)

    protein.protein = True
    check_molecule(protein, force_protein=True)

    # force coordinates
    mol = oddt.toolkit.readstring('smi', 'c1ccccc1')
    with pytest.raises(ValueError, match='3D coordinates'):
        check_molecule(mol, force_coords=True)

    mol.make3D()
    check_molecule(mol, force_coords=True)

    # with pytest.raises(ValueError, match='positional'):
    #     check_molecule(mol, True)

    mol = oddt.toolkit.readstring(
        'sdf', '''mol_title
 handmade

  0  0  0  0  0  0  0  0  0  0999 V2000
M  END
                          ''')
    with pytest.raises(ValueError, match='has zero atoms'):
        check_molecule(mol, non_zero_atoms=True)
Beispiel #6
0
def test_check_molecule():
    with pytest.raises(ValueError, match='Molecule object'):
        check_molecule([])

    ligand = next(oddt.toolkit.readfile('sdf', xiap_crystal_ligand))
    check_molecule(ligand)

    # force protein
    protein = next(oddt.toolkit.readfile('pdb', xiap_protein))
    with pytest.raises(ValueError, match='marked as a protein'):
        check_molecule(protein, force_protein=True)

    protein.protein = True
    check_molecule(protein, force_protein=True)

    # force coordinates
    mol = oddt.toolkit.readstring('smi', 'c1ccccc1')
    with pytest.raises(ValueError, match='3D coordinates'):
        check_molecule(mol, force_coords=True)

    mol.make3D()
    check_molecule(mol, force_coords=True)

    # with pytest.raises(ValueError, match='positional'):
    #     check_molecule(mol, True)

    mol = oddt.toolkit.readstring('sdf', '''mol_title
 handmade

  0  0  0  0  0  0  0  0  0  0999 V2000
M  END
                          ''')
    with pytest.raises(ValueError, match='has zero atoms'):
        check_molecule(mol, non_zero_atoms=True)
Beispiel #7
0
    def dock(self, ligands, protein=None):
        """Automated docking procedure.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecule objects
            Ligands to dock

        protein: oddt.toolkit.Molecule object or None
            Protein object to be used. If None, then the default one
            is used, else the protein is new default.

        Returns
        -------
        ligands : array of oddt.toolkit.Molecule objects
            Array of ligands (scores are stored in mol.data method)
        """
        if protein:
            self.set_protein(protein)
        if not self.protein_file:
            raise IOError("No receptor.")
        if is_molecule(ligands):
            ligands = [ligands]
        ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_')
        output_array = []
        for n, ligand in enumerate(ligands):
            check_molecule(ligand, force_coords=True)
            ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n)
            ligand_outfile = ligand_file[:-6] + '_out.pdbqt'
            try:
                scores = parse_vina_docking_output(
                    subprocess.check_output([
                        self.executable, '--receptor', self.protein_file,
                        '--ligand', ligand_file, '--out', ligand_outfile
                    ] + self.params + ['--cpu', str(self.n_cpu)],
                                            stderr=subprocess.STDOUT))
            except subprocess.CalledProcessError as e:
                sys.stderr.write(e.output.decode('ascii'))
                if self.skip_bad_mols:
                    continue  # TODO: print some warning message
                else:
                    raise Exception('Autodock Vina failed. Command: "%s"' %
                                    ' '.join(e.cmd))

            # docked conformations may have wrong connectivity - use source ligand
            if is_openbabel_molecule(ligand):
                if oddt.toolkits.ob.__version__ >= '2.4.0':
                    # find the order of PDBQT atoms assigned by OpenBabel
                    with open(ligand_file) as f:
                        write_order = [
                            int(line[7:12].strip()) for line in f
                            if line[:4] == 'ATOM'
                        ]
                    new_order = sorted(range(len(write_order)),
                                       key=write_order.__getitem__)
                    new_order = [i + 1
                                 for i in new_order]  # OBMol has 1 based idx

                    assert len(new_order) == len(ligand.atoms)
                else:
                    # Openbabel 2.3.2 does not support perserving atom order.
                    # We read back the PDBQT ligand to get "correct" bonding.
                    ligand = next(oddt.toolkit.readfile('pdbqt', ligand_file))
                    if 'REMARK' in ligand.data:
                        del ligand.data['REMARK']

            docked_ligands = oddt.toolkit.readfile('pdbqt', ligand_outfile)
            for docked_ligand, score in zip(docked_ligands, scores):
                # Renumber atoms to match the input ligand
                if (is_openbabel_molecule(docked_ligand)
                        and oddt.toolkits.ob.__version__ >= '2.4.0'):
                    docked_ligand.OBMol.RenumberAtoms(new_order)
                # HACK: copy docked coordinates onto source ligand
                # We assume that the order of atoms match between ligands
                clone = ligand.clone
                clone.clone_coords(docked_ligand)
                clone.data.update(score)

                # Calculate RMSD to the input pose
                try:
                    clone.data['vina_rmsd_input'] = rmsd(ligand, clone)
                    clone.data['vina_rmsd_input_min'] = rmsd(
                        ligand, clone, method='min_symmetry')
                except Exception:
                    pass
                output_array.append(clone)
        rmtree(ligand_dir)
        return output_array
Beispiel #8
0
    def dock(self, ligands, protein=None):
        """Automated docking procedure.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecule objects
            Ligands to dock

        protein: oddt.toolkit.Molecule object or None
            Protein object to be used. If None, then the default one
            is used, else the protein is new default.

        Returns
        -------
        ligands : array of oddt.toolkit.Molecule objects
            Array of ligands (scores are stored in mol.data method)
        """
        if protein:
            self.set_protein(protein)
        if not self.protein_file:
            raise IOError("No receptor.")
        if is_molecule(ligands):
            ligands = [ligands]
        ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_')
        output_array = []
        for n, ligand in enumerate(ligands):
            check_molecule(ligand, force_coords=True)
            ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n)
            ligand_outfile = ligand_file[:-6] + '_out.pdbqt'
            try:
                scores = parse_vina_docking_output(
                    subprocess.check_output([self.executable, '--receptor',
                                             self.protein_file,
                                             '--ligand', ligand_file,
                                             '--out', ligand_outfile] +
                                            self.params +
                                            ['--cpu', str(self.n_cpu)],
                                            stderr=subprocess.STDOUT))
            except subprocess.CalledProcessError as e:
                sys.stderr.write(e.output.decode('ascii'))
                if self.skip_bad_mols:
                    continue  # TODO: print some warning message
                else:
                    raise Exception('Autodock Vina failed. Command: "%s"' %
                                    ' '.join(e.cmd))

            # docked conformations may have wrong connectivity - use source ligand
            if is_openbabel_molecule(ligand):
                if oddt.toolkits.ob.__version__ >= '2.4.0':
                    # find the order of PDBQT atoms assigned by OpenBabel
                    with open(ligand_file) as f:
                        write_order = [int(line[7:12].strip())
                                       for line in f
                                       if line[:4] == 'ATOM']
                    new_order = sorted(range(len(write_order)),
                                       key=write_order.__getitem__)
                    new_order = [i + 1 for i in new_order]  # OBMol has 1 based idx

                    assert len(new_order) == len(ligand.atoms)
                else:
                    # Openbabel 2.3.2 does not support perserving atom order.
                    # We read back the PDBQT ligand to get "correct" bonding.
                    ligand = next(oddt.toolkit.readfile('pdbqt', ligand_file))
                    if 'REMARK' in ligand.data:
                        del ligand.data['REMARK']

            docked_ligands = oddt.toolkit.readfile('pdbqt', ligand_outfile)
            for docked_ligand, score in zip(docked_ligands, scores):
                # Renumber atoms to match the input ligand
                if (is_openbabel_molecule(docked_ligand) and
                        oddt.toolkits.ob.__version__ >= '2.4.0'):
                    docked_ligand.OBMol.RenumberAtoms(new_order)
                # HACK: copy docked coordinates onto source ligand
                # We assume that the order of atoms match between ligands
                clone = ligand.clone
                clone.clone_coords(docked_ligand)
                clone.data.update(score)

                # Calculate RMSD to the input pose
                clone.data['vina_rmsd_input'] = rmsd(ligand, clone)
                clone.data['vina_rmsd_input_min'] = rmsd(ligand, clone,
                                                         method='min_symmetry')
                output_array.append(clone)
        rmtree(ligand_dir)
        return output_array