Ejemplo n.º 1
0
    def build_bfactor(self, ligands, protein, protein_pdb):
        """Builds b-factor descriptors for series of ligands.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule
            A list or iterable of ligands to build the descriptor or a
            single molecule.

        protein: oddt.toolkit.Molecule or None (default=None)
            Default protein to use as reference

        protein_pdb: the pdb id of the protein.

        """
        if protein:
            self.protein = protein
        if is_molecule(ligands):
            ligands = [ligands]
        out = []
        for mol in ligands:
            mol_dict = atoms_by_type(mol.atom_dict, self.ligand_types,
                                     self.mode)
            if self.aligned_pairs:
                pairs = zip(self.ligand_types, self.protein_types)
            else:
                pairs = [(mol_type, prot_type)
                         for mol_type in self.ligand_types
                         for prot_type in self.protein_types]

            dist = distance(self.protein.atom_dict['coords'],
                            mol.atom_dict['coords'])
            within_cutoff = (dist <= self.cutoff.max()).any(axis=1)
            local_protein_dict = self.protein.atom_dict[within_cutoff]

            prot_dict = atoms_by_type(local_protein_dict, self.protein_types,
                                      self.mode)
            desc = []
            for mol_type, prot_type in pairs:
                d = distance(prot_dict[prot_type]['coords'],
                             mol_dict[mol_type]['coords'])[..., np.newaxis]
                if len(self.cutoff) > 1:
                    count = ((d > self.cutoff[..., 0]) &
                             (d <= self.cutoff[..., 1])).sum(axis=(0, 1))

                else:
                    count = (d <= self.cutoff).sum()
                desc.append(count)
            desc = np.array(desc, dtype=int).flatten()
            out.append(desc)

        parser = PDBParser()
        structure = parser.get_structure('pdb', protein_pdb)
        atoms = structure.get_atoms()

        # Get the b_factors for each atom the structure
        for a in atoms:
            out = [np.append(out[0], np.array(a.get_bfactor()))]

        return np.vstack(out)
Ejemplo n.º 2
0
    def build(self, ligands, protein=None):
        """Builds descriptors for series of ligands

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule
            A list or iterable of ligands to build the descriptor or a
            single molecule.

        protein: oddt.toolkit.Molecule or None (default=None)
            Default protein to use as reference

        """
        if protein:
            self.protein = protein
        if is_molecule(ligands):
            ligands = [ligands]
        out = []
        for mol in ligands:
            if self.protein is None:
                out.append(self.func(mol))
            else:
                out.append(self.func(mol, protein=self.protein))
        if self.sparse:
            # out = list(map(partial(sparse_to_csr_matrix, size=self.shape), out))
            return sparse_vstack(map(
                partial(sparse_to_csr_matrix, size=self.shape), out),
                                 format='csr')
        else:
            return np.vstack(out)
Ejemplo n.º 3
0
    def similarity(self, method, query, cutoff=0.9, protein=None):
        """Similarity filter. Supported structural methods:
            * ift: interaction fingerprints
            * sift: simple interaction fingerprints
            * usr: Ultrafast Shape recognition
            * usr_cat: Ultrafast Shape recognition, Credo Atom Types
            * electroshape: Electroshape, an USR method including partial charges

        Parameters
        ----------
            method: string
                Similarity method used to compare molecules. Avaiale methods:
                * `ifp` - interaction fingerprint (requires a receptor)
                * `sifp` - simple interaction fingerprint (requires a receptor)
                * `usr` - Ultrafast Shape Reckognition
                * `usr_cat` - USR, with CREDO atom types
                * `electroshape` - Electroshape, USR with moments representing
                partial charge

            query: oddt.toolkit.Molecule or list of oddt.toolkit.Molecule
                Query molecules to compare the pipeline to.

            cutoff: float
                Similarity cutoff for filtering molecules. Any similarity lower
                than it will be filtered out.

            protein: oddt.toolkit.Molecule (default = None)
                Protein for underling method. By default it's empty, but
                sturctural fingerprints need one.

        """
        if is_molecule(query):
            query = [query]

        # choose fp/usr and appropriate distance
        if method.lower() == 'ifp':
            gen = partial(InteractionFingerprint, protein=protein)
            dist = dice
        elif method.lower() == 'sifp':
            gen = partial(SimpleInteractionFingerprint, protein=protein)
            dist = dice
        elif method.lower() == 'usr':
            gen = usr
            dist = usr_similarity
        elif method.lower() == 'usr_cat':
            gen = usr_cat
            dist = usr_similarity
        elif method.lower() == 'electroshape':
            gen = electroshape
            dist = usr_similarity
        else:
            raise ValueError('Similarity filter "%s" is not supported.' % method)
        # generate FPs for query molecules once
        query_fps = [gen(q) for q in query]
        self._pipe.append(partial(_filter_similarity,
                                  distance=dist,
                                  generator=gen,  # same generator for pipe mols
                                  query_fps=query_fps,
                                  cutoff=cutoff))
Ejemplo n.º 4
0
    def similarity(self, method, query, cutoff=0.9, protein=None):
        """Similarity filter. Supported structural methods:
            * ift: interaction fingerprints
            * sift: simple interaction fingerprints
            * usr: Ultrafast Shape recognition
            * usr_cat: Ultrafast Shape recognition, Credo Atom Types
            * electroshape: Electroshape, an USR method including partial charges

        Parameters
        ----------
        method: string
            Similarity method used to compare molecules. Avaiale methods:
            * `ifp` - interaction fingerprint (requires a receptor)
            * `sifp` - simple interaction fingerprint (requires a receptor)
            * `usr` - Ultrafast Shape Reckognition
            * `usr_cat` - USR, with CREDO atom types
            * `electroshape` - Electroshape, USR with moments representing
            partial charge

        query: oddt.toolkit.Molecule or list of oddt.toolkit.Molecule
            Query molecules to compare the pipeline to.

        cutoff: float
            Similarity cutoff for filtering molecules. Any similarity lower
            than it will be filtered out.

        protein: oddt.toolkit.Molecule (default = None)
            Protein for underling method. By default it's empty, but
            sturctural fingerprints need one.

        """
        if is_molecule(query):
            query = [query]

        # choose fp/usr and appropriate distance
        if method.lower() == 'ifp':
            gen = partial(InteractionFingerprint, protein=protein)
            dist = dice
        elif method.lower() == 'sifp':
            gen = partial(SimpleInteractionFingerprint, protein=protein)
            dist = dice
        elif method.lower() == 'usr':
            gen = usr
            dist = usr_similarity
        elif method.lower() == 'usr_cat':
            gen = usr_cat
            dist = usr_similarity
        elif method.lower() == 'electroshape':
            gen = electroshape
            dist = usr_similarity
        else:
            raise ValueError('Similarity filter "%s" is not supported.' % method)
        # generate FPs for query molecules once
        query_fps = [gen(q) for q in query]
        self._pipe.append(partial(_filter_similarity,
                                  distance=dist,
                                  generator=gen,  # same generator for pipe mols
                                  query_fps=query_fps,
                                  cutoff=cutoff))
Ejemplo n.º 5
0
 def build(self, mols):
     if is_molecule(mols):
         mols = [mols]
     out = []
     for mol in mols:
         fp = self._get_fingerprint(mol)
         out.append(fp)
     return np.vstack(out)
Ejemplo n.º 6
0
    def build_num_aromat_rings(self, ligands, protein, ligand_sdf):
        """Builds number of aromatic rings descriptors for series of ligands.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule
            A list or iterable of ligands to build the descriptor or a
            single molecule.

        protein: oddt.toolkit.Molecule or None (default=None)
            Default protein to use as reference

        ligand_sdf: the path to the sdf-file of the ligand.
        """
        if protein:
            self.protein = protein
        if is_molecule(ligands):
            ligands = [ligands]
        out = []
        for mol in ligands:
            mol_dict = atoms_by_type(mol.atom_dict, self.ligand_types,
                                     self.mode)
            if self.aligned_pairs:
                pairs = zip(self.ligand_types, self.protein_types)
            else:
                pairs = [(mol_type, prot_type)
                         for mol_type in self.ligand_types
                         for prot_type in self.protein_types]

            dist = distance(self.protein.atom_dict['coords'],
                            mol.atom_dict['coords'])
            within_cutoff = (dist <= self.cutoff.max()).any(axis=1)
            local_protein_dict = self.protein.atom_dict[within_cutoff]

            prot_dict = atoms_by_type(local_protein_dict, self.protein_types,
                                      self.mode)
            desc = []
            for mol_type, prot_type in pairs:
                d = distance(prot_dict[prot_type]['coords'],
                             mol_dict[mol_type]['coords'])[..., np.newaxis]
                if len(self.cutoff) > 1:
                    count = ((d > self.cutoff[..., 0]) &
                             (d <= self.cutoff[..., 1])).sum(axis=(0, 1))

                else:
                    count = (d <= self.cutoff).sum()
                desc.append(count)
            desc = np.array(desc, dtype=int).flatten()
            out.append(desc)

        for mol in pybel.readfile("sdf", ligand_sdf):  # could be sdf or mol2
            result = [
                "Aromatic" for r in mol.OBMol.GetSSSR() if r.IsAromatic()
            ]
            out = [np.append(out[0], np.array(len(result)))]

        output = np.vstack(out)
        return output
Ejemplo n.º 7
0
    def build(self, ligands, protein=None):
        """Builds descriptors for series of ligands

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule
            A list or iterable of ligands to build the descriptor or a
            single molecule.

        protein: oddt.toolkit.Molecule or None (default=None)
            Default protein to use as reference

        """
        if protein:
            self.protein = protein
        if is_molecule(ligands):
            ligands = [ligands]
        out = []
        for mol in ligands:
            mol_dict = atoms_by_type(mol.atom_dict, self.ligand_types,
                                     self.mode)
            if self.aligned_pairs:
                pairs = zip(self.ligand_types, self.protein_types)
            else:
                pairs = [(mol_type, prot_type)
                         for mol_type in self.ligand_types
                         for prot_type in self.protein_types]

            dist = distance(self.protein.atom_dict['coords'],
                            mol.atom_dict['coords'])
            within_cutoff = (dist <= self.cutoff.max()).any(axis=1)
            local_protein_dict = self.protein.atom_dict[within_cutoff]

            prot_dict = atoms_by_type(local_protein_dict, self.protein_types,
                                      self.mode)
            desc = []
            for mol_type, prot_type in pairs:
                d = distance(prot_dict[prot_type]['coords'],
                             mol_dict[mol_type]['coords'])[..., np.newaxis]
                if len(self.cutoff) > 1:
                    count = ((d > self.cutoff[..., 0]) &
                             (d <= self.cutoff[..., 1])).sum(axis=(0, 1))

                else:
                    count = (d <= self.cutoff).sum()
                desc.append(count)
            desc = np.array(desc, dtype=int).flatten()
            out.append(desc)
        return np.vstack(out)
Ejemplo n.º 8
0
    def score(self, ligands, protein=None):
        """Automated scoring procedure.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecule objects
            Ligands to score

        protein: oddt.toolkit.Molecule object or None
            Protein object to be used. If None, then the default
            one is used, else the protein is new default.

        Returns
        -------
        ligands : array of oddt.toolkit.Molecule objects
            Array of ligands (scores are stored in mol.data method)
        """
        if protein:
            self.set_protein(protein)
        if not self.protein_file:
            raise IOError("No receptor.")
        if is_molecule(ligands):
            ligands = [ligands]
        ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_')
        output_array = []
        for n, ligand in enumerate(ligands):
            check_molecule(ligand, force_coords=True)
            ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n)
            try:
                scores = parse_vina_scoring_output(
                    subprocess.check_output([
                        self.executable, '--score_only', '--receptor',
                        self.protein_file, '--ligand', ligand_file
                    ] + self.params,
                                            stderr=subprocess.STDOUT))
            except subprocess.CalledProcessError as e:
                sys.stderr.write(e.output.decode('ascii'))
                if self.skip_bad_mols:
                    continue
                else:
                    raise Exception('Autodock Vina failed. Command: "%s"' %
                                    ' '.join(e.cmd))
            ligand.data.update(scores)
            output_array.append(ligand)
        rmtree(ligand_dir)
        return output_array
Ejemplo n.º 9
0
    def score(self, ligands, protein=None):
        """Automated scoring procedure.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecule objects
            Ligands to score

        protein: oddt.toolkit.Molecule object or None
            Protein object to be used. If None, then the default
            one is used, else the protein is new default.

        Returns
        -------
        ligands : array of oddt.toolkit.Molecule objects
            Array of ligands (scores are stored in mol.data method)
        """
        if protein:
            self.set_protein(protein)
        if not self.protein_file:
            raise IOError("No receptor.")
        if is_molecule(ligands):
            ligands = [ligands]
        ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_')
        output_array = []
        for n, ligand in enumerate(ligands):
            check_molecule(ligand, force_coords=True)
            ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n)
            try:
                scores = parse_vina_scoring_output(
                    subprocess.check_output([self.executable, '--score_only',
                                             '--receptor', self.protein_file,
                                             '--ligand', ligand_file] + self.params,
                                            stderr=subprocess.STDOUT))
            except subprocess.CalledProcessError as e:
                sys.stderr.write(e.output.decode('ascii'))
                if self.skip_bad_mols:
                    continue
                else:
                    raise Exception('Autodock Vina failed. Command: "%s"' %
                                    ' '.join(e.cmd))
            ligand.data.update(scores)
            output_array.append(ligand)
        rmtree(ligand_dir)
        return output_array
Ejemplo n.º 10
0
 def build(self, ligands, protein=None):
     if protein:
         self.set_protein(protein)
     else:
         protein = self.protein
     if is_molecule(ligands):
         ligands = [ligands]
     desc = None
     for mol in ligands:
         # Vina
         # TODO: Asynchronous output from vina, push command to score and retrieve at the end?
         # TODO: Check if ligand has vina scores
         scored_mol = self.vina.score(mol)[0].data
         vec = np.array(([scored_mol[key] for key in self.vina_scores]),
                        dtype=np.float32).flatten()
         if desc is None:
             desc = vec
         else:
             desc = np.vstack((desc, vec))
     return np.atleast_2d(desc)
Ejemplo n.º 11
0
 def build(self, ligands, protein=None):
     if protein:
         self.set_protein(protein)
     else:
         protein = self.protein
     if is_molecule(ligands):
         ligands = [ligands]
     desc = None
     for mol in ligands:
         mol_keys = mol.data.keys()
         if any(x not in mol_keys for x in self.vina_scores):
             self.vina.set_ligand(mol)
             inter = self.vina.score_inter()
             intra = self.vina.score_intra()
             num_rotors = self.vina.num_rotors
             # could use self.vina.score(), but better to reuse variables
             affinity = ((inter * self.vina.weights[:5]).sum() /
                         (1 + self.vina.weights[5] * num_rotors))
             assert len(self.all_vina_scores) == len(inter) + len(intra) + 2
             score = dict(
                 zip(
                     self.all_vina_scores,
                     np.hstack(
                         (affinity, inter, intra, num_rotors)).flatten()))
             mol.data.update(score)
         else:
             score = mol.data.to_dict()
         try:
             vec = np.array([score[s] for s in self.vina_scores],
                            dtype=np.float32).flatten()
         except Exception as e:
             print(score, affinity, inter, intra, num_rotors)
             print(mol.title)
             raise e
         if desc is None:
             desc = vec
         else:
             desc = np.vstack((desc, vec))
     return np.atleast_2d(desc)
Ejemplo n.º 12
0
    def dock(self, ligands, protein=None):
        """Automated docking procedure.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecule objects
            Ligands to dock

        protein: oddt.toolkit.Molecule object or None
            Protein object to be used. If None, then the default one
            is used, else the protein is new default.

        Returns
        -------
        ligands : array of oddt.toolkit.Molecule objects
            Array of ligands (scores are stored in mol.data method)
        """
        if protein:
            self.set_protein(protein)
        if not self.protein_file:
            raise IOError("No receptor.")
        if is_molecule(ligands):
            ligands = [ligands]
        ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_')
        output_array = []
        for n, ligand in enumerate(ligands):
            check_molecule(ligand, force_coords=True)
            ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n)
            ligand_outfile = ligand_file[:-6] + '_out.pdbqt'
            try:
                scores = parse_vina_docking_output(
                    subprocess.check_output([
                        self.executable, '--receptor', self.protein_file,
                        '--ligand', ligand_file, '--out', ligand_outfile
                    ] + self.params + ['--cpu', str(self.n_cpu)],
                                            stderr=subprocess.STDOUT))
            except subprocess.CalledProcessError as e:
                sys.stderr.write(e.output.decode('ascii'))
                if self.skip_bad_mols:
                    continue  # TODO: print some warning message
                else:
                    raise Exception('Autodock Vina failed. Command: "%s"' %
                                    ' '.join(e.cmd))

            # docked conformations may have wrong connectivity - use source ligand
            if is_openbabel_molecule(ligand):
                if oddt.toolkits.ob.__version__ >= '2.4.0':
                    # find the order of PDBQT atoms assigned by OpenBabel
                    with open(ligand_file) as f:
                        write_order = [
                            int(line[7:12].strip()) for line in f
                            if line[:4] == 'ATOM'
                        ]
                    new_order = sorted(range(len(write_order)),
                                       key=write_order.__getitem__)
                    new_order = [i + 1
                                 for i in new_order]  # OBMol has 1 based idx

                    assert len(new_order) == len(ligand.atoms)
                else:
                    # Openbabel 2.3.2 does not support perserving atom order.
                    # We read back the PDBQT ligand to get "correct" bonding.
                    ligand = next(oddt.toolkit.readfile('pdbqt', ligand_file))
                    if 'REMARK' in ligand.data:
                        del ligand.data['REMARK']

            docked_ligands = oddt.toolkit.readfile('pdbqt', ligand_outfile)
            for docked_ligand, score in zip(docked_ligands, scores):
                # Renumber atoms to match the input ligand
                if (is_openbabel_molecule(docked_ligand)
                        and oddt.toolkits.ob.__version__ >= '2.4.0'):
                    docked_ligand.OBMol.RenumberAtoms(new_order)
                # HACK: copy docked coordinates onto source ligand
                # We assume that the order of atoms match between ligands
                clone = ligand.clone
                clone.clone_coords(docked_ligand)
                clone.data.update(score)

                # Calculate RMSD to the input pose
                try:
                    clone.data['vina_rmsd_input'] = rmsd(ligand, clone)
                    clone.data['vina_rmsd_input_min'] = rmsd(
                        ligand, clone, method='min_symmetry')
                except Exception:
                    pass
                output_array.append(clone)
        rmtree(ligand_dir)
        return output_array
Ejemplo n.º 13
0
    def build_stand_alone(self, ligands, lig_sdf, protein, prot_pdb,
                          nmbr_modes, schroedinger_path):
        """Combines qikprop properties, #aromatic rings, #rotatable bons, #eigenvectors
        and #eigenvalues as descriptors. Used in ET-Score.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule
            A list or iterable of ligands to build the descriptor or a
            single molecule.

        protein: oddt.toolkit.Molecule or None (default=None)
            Default protein to use as reference

        protein_pdb: the pdb id of the protein.

        ligand_sdf: the path to the sdf-file of the ligand.

        """
        if protein:
            self.protein = protein
        if is_molecule(ligands):
            ligands = [ligands]
        out = []
        for mol in ligands:
            mol_dict = atoms_by_type(mol.atom_dict, self.ligand_types,
                                     self.mode)
            if self.aligned_pairs:
                pairs = zip(self.ligand_types, self.protein_types)
            else:
                pairs = [(mol_type, prot_type)
                         for mol_type in self.ligand_types
                         for prot_type in self.protein_types]

            dist = distance(self.protein.atom_dict['coords'],
                            mol.atom_dict['coords'])
            within_cutoff = (dist <= self.cutoff.max()).any(axis=1)
            local_protein_dict = self.protein.atom_dict[within_cutoff]

            prot_dict = atoms_by_type(local_protein_dict, self.protein_types,
                                      self.mode)
            desc = []
            for mol_type, prot_type in pairs:
                d = distance(prot_dict[prot_type]['coords'],
                             mol_dict[mol_type]['coords'])[..., np.newaxis]
                if len(self.cutoff) > 1:
                    count = ((d > self.cutoff[..., 0]) &
                             (d <= self.cutoff[..., 1])).sum(axis=(0, 1))

                else:
                    count = (d <= self.cutoff).sum()
                desc.append(count)
            desc = np.array(desc, dtype=int).flatten()
            out.append(desc)

        if schroedinger_path[-1] != "/":
            schroedinger_path += "/"

        subprocess.call(schroedinger_path + "qikprop -NOJOBID %s" % lig_sdf,
                        shell=True)
        if ("/" in lig_sdf):
            pattern = re.compile(r'/([^/]+)\.sdf')
            lig_name = re.search(pattern, lig_sdf).group(1)
            print(lig_name)
        else:
            lig_name = lig_sdf[:-4]  # remove .sdf

        qikprops = {}
        with open("%s.CSV" % lig_name) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                qikprops["FOSA"] = row["FOSA"]
                qikprops["FISA"] = row["FISA"]
                qikprops["WPSA"] = row["WPSA"]
                qikprops["QPlogPo/w"] = row["QPlogPo/w"]
                qikprops["QPlogHERG"] = row["QPlogHERG"]
                qikprops["QPlogKhsa"] = row["QPlogKhsa"]
                qikprops["QPPMDCK"] = row["QPPMDCK"]
                qikprops["QPlogKp"] = row["QPlogKp"]

        # Add QikProp properties as descriptors
        fail = 0
        for prop in qikprops:
            qikprops[prop]
            if qikprops[prop] == '':  # QikProp has failed
                if fail == 0:
                    with open("./qikFail_eigv.txt", "a+") as results:
                        results.write("%s \n" % lig_name)
                    fail = 1
                out = [np.append(out[0], np.array(0))]
            else:
                out = [np.append(out[0], np.array(float(qikprops[prop])))]

        # Add Number of rotatable bonds
        for mol in pybel.readfile("sdf", lig_sdf):  # can be sdf or mol2
            out = [np.append(out[0], np.array(mol.OBMol.NumRotors()))]

        # Add number of aromatic rings
        for mol in pybel.readfile("sdf", lig_sdf):  # could be sdf or mol2
            result = [
                "Aromatic" for r in mol.OBMol.GetSSSR() if r.IsAromatic()
            ]
            out = [np.append(out[0], np.array(len(result)))]

        print("Protein:")
        print(prot_pdb)

        pdb = parsePDB(prot_pdb)
        calphas = pdb.select('calpha')

        anm = ANM('pdb ANM analysis')
        anm.buildHessian(calphas, cutoff=12.0)
        anm.getHessian().round(3)
        anm.calcModes(n_modes=nmbr_modes)

        # Add NMA Length
        out = [np.append(out[0], np.array(len(anm.getEigvals())))]
        out = [np.append(out[0], np.array(len(anm.getEigvecs())))]

        return np.vstack(out)
Ejemplo n.º 14
0
    def build_eigval_qik(self, ligands, protein, protein_pdb, ligand_sdf):
        """Combines nma_eigenvalues and qikprop-properties descriptors for a series of ligands.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule
            A list or iterable of ligands to build the descriptor or a
            single molecule.

        protein: oddt.toolkit.Molecule or None (default=None)
            Default protein to use as reference

        protein_pdb: the pdb id of the protein.

        ligand_sdf: the path to the sdf-file of the ligand.

        """
        if protein:
            self.protein = protein
        if is_molecule(ligands):
            ligands = [ligands]
        out = []
        for mol in ligands:
            mol_dict = atoms_by_type(mol.atom_dict, self.ligand_types,
                                     self.mode)
            if self.aligned_pairs:
                pairs = zip(self.ligand_types, self.protein_types)
            else:
                pairs = [(mol_type, prot_type)
                         for mol_type in self.ligand_types
                         for prot_type in self.protein_types]

            dist = distance(self.protein.atom_dict['coords'],
                            mol.atom_dict['coords'])
            within_cutoff = (dist <= self.cutoff.max()).any(axis=1)
            local_protein_dict = self.protein.atom_dict[within_cutoff]

            prot_dict = atoms_by_type(local_protein_dict, self.protein_types,
                                      self.mode)
            desc = []
            for mol_type, prot_type in pairs:
                d = distance(prot_dict[prot_type]['coords'],
                             mol_dict[mol_type]['coords'])[..., np.newaxis]
                if len(self.cutoff) > 1:
                    count = ((d > self.cutoff[..., 0]) &
                             (d <= self.cutoff[..., 1])).sum(axis=(0, 1))

                else:
                    count = (d <= self.cutoff).sum()
                desc.append(count)
            desc = np.array(desc, dtype=int).flatten()
            out.append(desc)

        lig_id = ligand_sdf[-15:-4]
        subprocess.call("/opt/schrodinger2017-4/qikprop -NOJOBID %s" %
                        ligand_sdf,
                        shell=True)

        qikprops = {}
        with open("%s.CSV" % lig_id) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                qikprops["FOSA"] = row["FOSA"]
                qikprops["FISA"] = row["FISA"]
                qikprops["WPSA"] = row["WPSA"]
                qikprops["QPlogPo/w"] = row["QPlogPo/w"]
                qikprops["QPlogHERG"] = row["QPlogHERG"]
                qikprops["QPlogKhsa"] = row["QPlogKhsa"]
                qikprops["QPPMDCK"] = row["QPPMDCK"]
                qikprops["QPlogKp"] = row["QPlogKp"]

        lig_name = lig_id[:-7]
        subprocess.call("rm " + lig_name + "*", shell=True)

        # Add QikProp properties as descriptors
        fail = 0
        for prop in qikprops:
            qikprops[prop]
            if qikprops[prop] == '':  # QikProp has failed
                if fail == 0:
                    with open("./qikFail_eigv.txt", "a+") as results:
                        results.write("%s \n" % lig_name)
                    fail = 1
                out = [np.append(out[0], np.array(0))]
            else:
                out = [np.append(out[0], np.array(float(qikprops[prop])))]

        # Add NMA Eigenvalues
        pdb = parsePDB(protein_pdb)
        calphas = pdb.select('calpha')

        anm = ANM('pdb ANM analysis')
        anm.buildHessian(calphas, cutoff=12.0)
        anm.getHessian().round(3)
        anm.calcModes()

        for mode in anm:
            desc = np.array(mode.getEigval(), dtype=int).flatten()
            out = [np.append(out[0], np.array(mode.getEigval()))]

        return np.vstack(out)
Ejemplo n.º 15
0
    def build_nmaLength(self, ligands, protein, protein_pdb, nmbr_modes):
        """Builds descriptors with number of eigenvalues
        and eigenvectors for series of ligands and proteins.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule
            A list or iterable of ligands to build the descriptor or a
            single molecule.

        protein: oddt.toolkit.Molecule or None (default=None)
            Default protein to use as reference

        protein_pdb: the pdb id of the protein.

        nmbr_modes: the number of normal modes that will be calculated.

        """
        if protein:
            self.protein = protein
        if is_molecule(ligands):
            ligands = [ligands]
        out = []
        for mol in ligands:
            mol_dict = atoms_by_type(mol.atom_dict, self.ligand_types,
                                     self.mode)
            if self.aligned_pairs:
                pairs = zip(self.ligand_types, self.protein_types)
            else:
                pairs = [(mol_type, prot_type)
                         for mol_type in self.ligand_types
                         for prot_type in self.protein_types]

            dist = distance(self.protein.atom_dict['coords'],
                            mol.atom_dict['coords'])
            within_cutoff = (dist <= self.cutoff.max()).any(axis=1)
            local_protein_dict = self.protein.atom_dict[within_cutoff]

            prot_dict = atoms_by_type(local_protein_dict, self.protein_types,
                                      self.mode)
            desc = []
            for mol_type, prot_type in pairs:
                d = distance(prot_dict[prot_type]['coords'],
                             mol_dict[mol_type]['coords'])[..., np.newaxis]
                if len(self.cutoff) > 1:
                    count = ((d > self.cutoff[..., 0]) &
                             (d <= self.cutoff[..., 1])).sum(axis=(0, 1))

                else:
                    count = (d <= self.cutoff).sum()
                desc.append(count)
            desc = np.array(desc, dtype=int).flatten()
            out.append(desc)

        # New normal modes descriptors
        # print(protein_pdb)
        pdb = parsePDB(protein_pdb)
        calphas = pdb.select('calpha')

        anm = ANM('pdb ANM analysis')
        anm.buildHessian(calphas, cutoff=12.0)
        anm.getHessian().round(3)
        anm.calcModes(n_modes=nmbr_modes)

        out = [np.append(out[0], np.array(len(anm.getEigvals())))]
        out = [np.append(out[0], np.array(len(anm.getEigvecs())))]

        output = np.vstack(out)
        return output
Ejemplo n.º 16
0
    def dock(self, ligands, protein=None):
        """Automated docking procedure.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecule objects
            Ligands to dock

        protein: oddt.toolkit.Molecule object or None
            Protein object to be used. If None, then the default one
            is used, else the protein is new default.

        Returns
        -------
        ligands : array of oddt.toolkit.Molecule objects
            Array of ligands (scores are stored in mol.data method)
        """
        if protein:
            self.set_protein(protein)
        if not self.protein_file:
            raise IOError("No receptor.")
        if is_molecule(ligands):
            ligands = [ligands]
        ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_')
        output_array = []
        for n, ligand in enumerate(ligands):
            check_molecule(ligand, force_coords=True)
            ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n)
            ligand_outfile = ligand_file[:-6] + '_out.pdbqt'
            try:
                scores = parse_vina_docking_output(
                    subprocess.check_output([self.executable, '--receptor',
                                             self.protein_file,
                                             '--ligand', ligand_file,
                                             '--out', ligand_outfile] +
                                            self.params +
                                            ['--cpu', str(self.n_cpu)],
                                            stderr=subprocess.STDOUT))
            except subprocess.CalledProcessError as e:
                sys.stderr.write(e.output.decode('ascii'))
                if self.skip_bad_mols:
                    continue  # TODO: print some warning message
                else:
                    raise Exception('Autodock Vina failed. Command: "%s"' %
                                    ' '.join(e.cmd))

            # docked conformations may have wrong connectivity - use source ligand
            if is_openbabel_molecule(ligand):
                if oddt.toolkits.ob.__version__ >= '2.4.0':
                    # find the order of PDBQT atoms assigned by OpenBabel
                    with open(ligand_file) as f:
                        write_order = [int(line[7:12].strip())
                                       for line in f
                                       if line[:4] == 'ATOM']
                    new_order = sorted(range(len(write_order)),
                                       key=write_order.__getitem__)
                    new_order = [i + 1 for i in new_order]  # OBMol has 1 based idx

                    assert len(new_order) == len(ligand.atoms)
                else:
                    # Openbabel 2.3.2 does not support perserving atom order.
                    # We read back the PDBQT ligand to get "correct" bonding.
                    ligand = next(oddt.toolkit.readfile('pdbqt', ligand_file))
                    if 'REMARK' in ligand.data:
                        del ligand.data['REMARK']

            docked_ligands = oddt.toolkit.readfile('pdbqt', ligand_outfile)
            for docked_ligand, score in zip(docked_ligands, scores):
                # Renumber atoms to match the input ligand
                if (is_openbabel_molecule(docked_ligand) and
                        oddt.toolkits.ob.__version__ >= '2.4.0'):
                    docked_ligand.OBMol.RenumberAtoms(new_order)
                # HACK: copy docked coordinates onto source ligand
                # We assume that the order of atoms match between ligands
                clone = ligand.clone
                clone.clone_coords(docked_ligand)
                clone.data.update(score)

                # Calculate RMSD to the input pose
                clone.data['vina_rmsd_input'] = rmsd(ligand, clone)
                clone.data['vina_rmsd_input_min'] = rmsd(ligand, clone,
                                                         method='min_symmetry')
                output_array.append(clone)
        rmtree(ligand_dir)
        return output_array