Esempio n. 1
0
        def create(protein,
                   hr,
                   max_constraints=2,
                   weight=5.0,
                   min_hbond_score=0.001,
                   cutoff=8):
            """
            creates a :class:`hotspots.hs_docking.HotspotHBondConstraint`

            :param `ccdc.protein.Protein` protein: the protein to be used for docking
            :param `hotspots.calculation.Result` hr: a result from Fragment Hotspot Maps
            :param int max_constraints: max number of constraints
            :param float weight: the constraint weight (default to be determined)
            :param float min_hbond_score: float between 0.0 (bad) and 1.0 (good) determining the minimum hydrogen bond quality in the solutions.
            :param cutoff: minimum score required to assign the constraint
            :return list: list of :class:`hotspots.hs_docking.HotspotHBondConstraint`
            """
            constraints = hr._docking_constraint_atoms(
                p=protein, max_constraints=max_constraints)

            with io.MoleculeWriter(
                    "/home/pcurran/github_packages/GOLD/akt1/check.mol2") as w:
                w.write(constraints.to_molecule())

            return [
                DockerSettings.HotspotHBondConstraint(
                    atoms=[protein.atoms[i]],
                    weight=weight,
                    min_hbond_score=min_hbond_score)
                for i, s in constraints.index_by_score.items() if s > cutoff
            ]
def run_conformers(inputs):
    mol_file, outdir = inputs

    fname = os.path.basename(mol_file).split(".")[0] + "_conf.mol2"
    conf_file = os.path.join(outdir, fname)

    conf_gen = conformer.ConformerGenerator()
    conf_gen.settings.max_conformers = 25

    with io.MoleculeReader(mol_file) as reader, io.MoleculeWriter(
            conf_file) as writer:
        for m in reader:
            confs = conf_gen.generate(m)
            # If conformer generation fails, ConformerGenerator returns None rather
            # than []. Trying to iterate over this will crash the script, so we skip
            # further steps for the structure at this point.
            if confs is None:
                print(
                    'WARNING: Conformer generation failed for structure %s in %s!'
                    % (m.identifier, mol_file))
                continue

            for i, c in enumerate(confs):
                m = c.molecule
                # DUD-E includes multiple protonation and tautomeric states. For the database creation
                # these must be given unique ids. In the rank analysis only the highest ranked state
                # will count.
                # ID_{file number}_{molecule number}_{conformer number}   --> ensures unique ID
                m.identifier = f"{m.identifier}_{i}"
                writer.write(m)
Esempio n. 3
0
    def get_mol_files_by_indentifiers(self,
                                      list_identifiers,
                                      path_save_mol_dir,
                                      list_solvent_names=None):
        """保存*.mol2到指定文件夹中

        :param list_identifiers: 晶体的id,type:list or tuple or array
        :param path_save_mol_dir: 保存晶体mol2文件的文件夹绝对路径,type:string
        :param list_solvent_names: 溶剂的名称,type:list or None
        :return: None
        """
        # 重新定义self.entry_reader
        self.entry_reader = [
            self.entry_reader.entry(id_name) for id_name in list_identifiers
        ]
        # 删除溶剂
        self.delete_solvents(list_solvent_names=list_solvent_names
                             )  # 去除溶剂,并且重新生成去除溶剂过后的self.entry_reader
        # 判断储存mol文件的路径是否存在,若不存在,则创建
        if not os.path.exists(path_save_mol_dir):
            os.makedirs(path_save_mol_dir)
        # 保存mol文件到指定文件夹
        p_bar = tqdm(self.entry_reader)
        for entry in p_bar:
            mol_file = entry.molecule
            os.chdir(path_save_mol_dir)
            with io.MoleculeWriter('%s.mol2' % (entry.identifier)) as writer:
                writer.write(mol_file)
            p_bar.set_description('正在保存文件:')

        return None
Esempio n. 4
0
def align(inputs):
    """"""
    try:
        ref_pdb, ref_mol, other_pdb, input_dir = inputs

        ref = Protein.from_file(os.path.join(input_dir, f"{ref_pdb}.pdb"))

        other_path = os.path.join(input_dir, f"{other_pdb}.pdb")
        other = Protein.from_file(other_path)

        if ref_mol:
            ref_mol_obj = [
                lig for lig in ref.ligands
                if lig.identifier.split(":")[1] == ref_mol
            ][0]
            ref_bind_site = Protein.BindingSiteFromMolecule(
                protein=ref, molecule=ref_mol_obj, distance=12)
        else:
            ref_bind_site = None

        chain_superposition = Protein.ChainSuperposition()
        # other chains already striped
        rms, X = chain_superposition.superpose(ref.chains[0],
                                               other.chains[0],
                                               binding_site1=ref_bind_site)

        with io.MoleculeWriter(other_path) as w:
            w.write(other)

        return rms

    except:
        return 999
Esempio n. 5
0
 def run(self):
     prot = Protein.from_file(self.input().path)
     prot.detect_ligand_bonds()
     prot.add_hydrogens()
     with io.MoleculeWriter(self.output().path) as w:
         for l in prot.ligands:
             if 'HEM' not in l.identifier:
                 w.write(l)
Esempio n. 6
0
    def _write_protein(path, prot):
        """
        writes protein to output directory

        :param prot: a protein
        :type prot: `ccdc.protein.Protein`

        """
        with io.MoleculeWriter(join(path, "protein.pdb")) as writer:
            writer.write(prot)
Esempio n. 7
0
    def _protein_preparation(self):
        ''' Remove water, ligands, metals and write out protein ready for call'''

        self.prot.remove_all_waters()

        for lig in self.prot.ligands:
            self.prot.remove_ligand(lig.identifier)
        self.prot.remove_all_metals()

        with io.MoleculeWriter(self.fname) as w:
            w.write(self.prot)
Esempio n. 8
0
 def _output_feature_centroid(self):
     dic = {"apolar": "C", "acceptor": "N", "donor": "O"}
     mol = Molecule(identifier="centroids")
     for i, feat in enumerate(self.features):
         coords = feat.grid.centroid()
         mol.add_atom(
             Atom(atomic_symbol=dic[feat.feature_type],
                  atomic_number=14,
                  coordinates=coords,
                  label=str(i)))
     from ccdc import io
     with io.MoleculeWriter("cenroid.mol2") as w:
         w.write(mol)
Esempio n. 9
0
    def _write_protein(self, prot, out_dir=None):
        """
        writes protein to output directory
        :param prot:
        :return:
        """
        if not out_dir:
            out = join(self.out_dir, "protein.pdb")

        else:
            out = join(out_dir, "protein.pdb")

        with io.MoleculeWriter(out) as writer:
            writer.write(prot)
Esempio n. 10
0
    def testalign(self):
        pdbs = ["2VTA", "1HCL"]
        entities = [0, 0]
        download(pdbs, out_dir=self.tmp)

        for p, e in zip(pdbs, entities):
            prepare_protein(p, e, self.tmp)

        args = ["2VTA", 'LZ11301', "1HCL", self.tmp]
        other, rms = align(args)

        with io.MoleculeWriter(
                os.path.join(self.tmp,
                             f"aligned_{other.identifier}.pdb")) as w:
            w.write(other)
Esempio n. 11
0
def protoss_download(args):
    """"""
    try:
        pdb, out_dir = args
        protoss = Protoss()
        result = protoss.add_hydrogens(pdb_code=pdb)

        out_path = os.path.join(out_dir, f"{pdb}.pdb")

        with io.MoleculeWriter(out_path) as w:
            w.write(result.protein)

        return out_path

    except:
        print(f"ERROR {pdb}")
Esempio n. 12
0
    def generate_fitting_points(self,
                                hr,
                                volume=400,
                                threshold=17,
                                mode='threshold'):
        """
        uses the Fragment Hotspot Maps to generate GOLD fitting points.

        GOLD fitting points are used to help place the molecules into the protein cavity. Pre-generating these fitting
        points using the Fragment Hotspot Maps helps to biast results towards making Hotspot interactions.

        :param `hotspots.result.Result` hr: a Fragment Hotspot Maps result
        :param int volume: volume of the occupied by fitting points in Angstroms ^ 3
        :param float threshold: points above this value will be included in the fitting points
        :param str mode: 'threshold'- assigns fitting points based on a score cutoff or 'bcv'- assigns fitting points from best continuous volume analysis (recommended)
        """
        temp = tempfile.mkdtemp()
        mol = molecule.Molecule(identifier="fitting_pts")

        if mode == 'threshold':
            dic = hr.super_grids["apolar"].grid_value_by_coordinates(
                threshold=threshold)

        elif mode == 'bcv':
            extractor = Extractor(hr=hr, volume=volume, mode="global")
            bv = extractor.extracted_hotspots[0].best_island
            dic = bv.grid_value_by_coordinates(threshold=17)

        else:
            raise TypeError(
                "{} not supported, see documentation for details".format(mode))

        for score, v in dic.items():
            for pts in v:
                atm = molecule.Atom(atomic_symbol='C',
                                    atomic_number=14,
                                    label='{:.2f}'.format(score),
                                    coordinates=pts)
                atm.partial_charge = score
                mol.add_atom(atom=atm)

        fname = os.path.join(temp, 'fit_pts.mol2')
        with io.MoleculeWriter(fname) as w:
            w.write(mol)

        self.fitting_points_file = fname
Esempio n. 13
0
    def _write_pharmacophore(self, pharmacophore):
        """
        writes pharmacophore to output directory
        :param pharmacophore:
        :return:
        """
        out = [
            join(self.out_dir, "pharmacophore" + fmat)
            for fmat in self.settings.pharmacophore_format
        ]
        for o in out:
            pharmacophore.write(o)

        label = self.get_label(pharmacophore)
        with io.MoleculeWriter(join(self.out_dir, "label_threshold_{}.mol2".format(pharmacophore.identifier))) \
                as writer:
            writer.write(label)
Esempio n. 14
0
    def generate_library(self):
        print "Decorate fragment..."
        m = Chem.MolFromMol2File(self.fragment)

        #inputs (place saver)
        mol = Chem.MolToSmiles(m)
        terminal = Chem.SDMolSupplier("r1-20.sdf")
        spacer = Chem.SDMolSupplier("r2-20.sdf")

        vl = self.supplier(mol, terminal, spacer)
        mols = [self.from_smiles(v) for v in vl]

        with io.MoleculeWriter("decorated_fragments.mol2") as w:
            for m in mols:
                w.write(m)

        return mols
Esempio n. 15
0
    def extract_ligands(self, other_id, lig_id):
        """
        extracts the relevant ligand(s) from the aligned PDB to a mol2 file

        :param str other_id: position in list of 'other' proteins
        :return:
        """
        # inputs
        other = Protein.from_file(self.other_pdbs[other_id])

        # tasks
        other.detect_ligand_bonds()
        print([a.identifier for a in other.ligands])
        print(other_id, lig_id)
        relevant = [l for l in other.ligands if lig_id in l.identifier.split(":")[1][0:3]]

        # output
        with io.MoleculeWriter(self.extracted_ligands[other_id][lig_id]) as writer:
            writer.write(relevant[0])  # if more than one ligand detected, output the first
Esempio n. 16
0
    def run_superstar(self, prot, out_dir):
        """
        calls SuperStar as command-line subprocess

        :param prot: a :class:`ccdc.protein.Protein` instance
        :param out_dir: str, output directory
        :return:
        """

        with utilities.PushDir(self.settings.working_directory):
            if prot is not None:
                with io.MoleculeWriter('protein.pdb') as writer:
                    writer.write(prot)
            self._get_inputs(out_dir)
            env = os.environ.copy()
            env.update(self.settings._superstar_env)
            cmd = self.settings._superstar_executable + ' ' + self.fname
            subprocess.call(cmd, shell=sys.platform != 'win32', env=env)
        return SuperstarResult(self.settings)
Esempio n. 17
0
    def run(self, protein, grid_spacing=0.5):
        """
        executes the command line call

        NB: different versions of ghecom have different commandline args

        :param protein: protein
        :param grid_spacing: grid spacing, must be the same used in hotspot calculation

        :type protein: `ccdc.protein.Protein`
        :type grid_spacing: float
        """
        with PushDir(self.temp):
            with io.MoleculeWriter('protein.pdb') as writer:
                writer.write(protein)

                cmd = f"{os.environ['GHECOM_EXE']} -ipdb {self.input} -M M -gw {grid_spacing} -rli 2.5 -rlx 9.5 -opocpdb {self.output}"
                os.system(cmd)

        return os.path.join(self.temp, self.output)
Esempio n. 18
0
    def _prep_protein(self):
        """
        removes no structural ligands and solvents from the protein. Hotspot method requires the cavitiy to be empty

        :return: None
        """

        # input
        prot = Protein.from_file(self.apo_protein)

        # task
        prot.remove_all_waters()
        prot.detect_ligand_bonds()
        for l in prot.ligands:
            if 'HEM' not in l.identifier:
                prot.remove_ligand(l.identifier)

        # output
        with io.MoleculeWriter(self.apo_prep) as w:
            w.write(prot)
Esempio n. 19
0
def prepare_protein(pdb, entity, out_dir):
    """"""
    # protein prep
    prot_file = os.path.join(out_dir, f"{pdb}.pdb")
    prot = Protein.from_file(prot_file)
    prot.remove_all_waters()
    prot.remove_all_metals()
    prot.detect_ligand_bonds()
    discard_chains = {c.identifier
                      for c in prot.chains} - {prot.chains[entity].identifier}

    for chain_id in discard_chains:
        prot.remove_chain(chain_id)

    for ligand in prot.ligands:
        if ligand.identifier.split(":")[0] in discard_chains:
            prot.remove_ligand(ligand.identifier)

    # overwrite protein
    with io.MoleculeWriter(prot_file) as w:
        w.write(prot)
Esempio n. 20
0
def chunk_files(mol_file, outdir, chunk_size=100):
    outfiles = list()

    if not os.path.exists(outdir):
        os.mkdir(outdir)

    mols = [m for m in io.MoleculeReader(mol_file)]

    chunks = [mols[x:x + chunk_size] for x in range(0, len(mols), chunk_size)]

    for i, chunk in enumerate(chunks):
        fname = f"{os.path.basename(mol_file).split('.')[0]}_chunk{i}.mol2"
        outfile = os.path.join(outdir, fname)
        outfiles.append(outfile)

        with io.MoleculeWriter(outfile) as w:
            for j, mol in enumerate(chunk):
                mol.identifier = f"{mol.identifier}_{i}_{j}"
                w.write(mol)

    return outfiles
Esempio n. 21
0
    def _write_grids(self,
                     grid_dict,
                     buriedness=None,
                     mesh=None,
                     out_dir=None):
        """
        writes grids to output directory
        :param grid_dict:
        :param buriedness:
        :return:
        """
        for p, g in grid_dict.items():
            fname = "{}{}".format(p, self.settings.grid_extension)
            if not out_dir:
                g.write(join(self.out_dir, fname))
            else:
                g.write(join(out_dir, fname))

        if buriedness:
            buriedness.write(
                join(self.out_dir,
                     "buriedness{}".format(self.settings.grid_extension)))

        if mesh:
            mesh.write(
                join(self.out_dir,
                     "mesh{}".format(self.settings.grid_extension)))

        if self.settings.grid_labels:
            labels = {
                "label_threshold_{}.mol2".format(threshold):
                self.get_label(grid_dict, threshold=threshold)
                for threshold in self.settings.isosurface_threshold
            }

            for fname, label in labels.items():
                with io.MoleculeWriter(join(self.out_dir, fname)) as writer:
                    writer.write(label)
Esempio n. 22
0
csd_and_updates_reader = io.EntryReader(csd_and_updates)


#for i in range(1000):
#    out_q.put(["srandom text"])

exceptions = []
i = 0
for mol in csd_entry_reader.molecules():
    start = time.time()
    try:
        ccdc_id = mol.identifier
        mol = mol.heaviest_component
        mol_smiles = mol.smiles
        assert len(mol_smiles) < 200
        outfile = ccdc_id + ".pdb"
        out_path = os.path.join(db_root,pdb_folder,outfile)
        assert not mol.is_polymeric
        assert mol.is_organic
        with io.MoleculeWriter(out_path) as filehandle:
            filehandle.write(mol)
            filehandle.close()
        out_q.put([ccdc_id,outfile,mol_smiles])
    except Exception as e:
        exceptions.append(e)
    print "exps:", "%.3f" % (1 / (time.time() - start)), "total:", i, "exceptions:", len(exceptions)
    i += 1

stop_event.set()
print "closing table with queue"
Esempio n. 23
0
from ccdc.protein import Protein
from ccdc import io


p0 = Protein.from_file(("1xkk.pdb"))
p1 = p0.copy()
p1.remove_all_waters()

with io.MoleculeWriter("1xkk_dry.pdb") as w:
    w.write(p1)

with io.MoleculeWriter("1xkk_dry.mol2") as w:
    w.write(p1)

p2 = Protein.from_file("1xkk_dry.pdb")
p3 = Protein.from_file("1xkk_dry.mol2")


for p, l in zip([p0, p2, p3], ["original", "mol2writer", "pdbwriter"]):
    print(l)
    print(f"# atoms: {len(p.atoms)}")
    print(f"# residues: {len(p.residues)}, # waters: {len(p.waters)}")
    print(p.residues[30:50])
    print(" ")
Esempio n. 24
0
#!/usr/bin/env python

from ccdc import io
from ccdc.search import SubstructureSearch, MoleculeSubstructure, SMARTSSubstructure

# main
if __name__ == "__main__":
    strings = [
        "NCC(=O)NCC(=O)", "N(C)CC(=O)NCC(=O)", "NCC(=O)N(C)CC(=O)",
        "N(C)CC(=O)N(C)CC(=O)"
    ]

    for pepsmile in strings:
        pep = SMARTSSubstructure(pepsmile)
        substructure_search_smiles = SubstructureSearch()
        sub_id = substructure_search_smiles.add_substructure(pep)
        print("Searching...")
        hits_smiles = substructure_search_smiles.search(
            max_hits_per_structure=1)
        print(len(hits_smiles), "hits found")
        for hit in hits_smiles:
            #print (hit.identifier)
            with io.MoleculeWriter("from_CSD/nmeth/" + hit.identifier +
                                   ".pdb") as pdb_writer:
                pdb_writer.write(hit.molecule)
Esempio n. 25
0
    conformers_dir = '%s-conformers' % mol_name
    if not os.path.isdir(conformers_dir):
        os.makedirs(conformers_dir)
else:
    print('No conformers found!')
    args.nominimise = True

# Minimise conformers and save ---------------------------------------------------------------------
if args.nominimise:
    print('Skiping minimisation...')
    if conformers is not None:
        for conf_idx, conf in enumerate(conformers):
            conf_path = os.path.join(
                conformers_dir,
                '%s-%i.%s' % (mol_name, conf_idx + 1, args.format))
            with io.MoleculeWriter(conf_path) as molecule_writer:
                molecule_writer.write(conf.molecule)
        conformers_mol = [c.molecule for c in conformers]
        print('Conformers saved in %s | format: %s\n' %
              (conformers_dir, args.format))
    else:
        conformers_mol = [mol]
else:
    # Run minimisation
    print('Minimising molecular geometry using Tripos force field...')
    molecule_minimiser = conformer.MoleculeMinimiser(
    )  # Uses Tripos force field
    min_conformers = []
    log_data = {}
    for conf_idx, conf in enumerate(conformers):
        score, rmsd = conf.normalised_score, conf.rmsd(
Esempio n. 26
0
    def from_pdb(pdb_code,
                 chain,
                 out_dir=None,
                 representatives=None,
                 identifier="LigandBasedPharmacophore"):
        """
        creates a Pharmacophore Model from a PDB code.

        This method is used for the creation of Ligand-Based pharmacophores. The PDB is searched for protein-ligand
        complexes of the same UniProt code as the input. These PDB's are align, the ligands are clustered and density
        of atom types a given point is assigned to a grid.

        :param str pdb_code: single PDB code from the target system
        :param str chain: chain of interest
        :param str out_dir: path to output directory
        :param representatives: path to .dat file containing previously clustered data (time saver)
        :param str identifier: identifier for the Pharmacophore Model

        :return: :class:`hotspots.hs_pharmacophore.PharmacophoreModel`


        >>> from hotspots.hs_pharmacophore import PharmacophoreModel
        >>> from hotspots.result import Results
        >>> from hotspots.hs_io import HotspotWriter
        >>> from ccdc.protein import Protein
        >>> from pdb_python_api import PDBResult


        >>> # get the PDB ligand-based Pharmacophore for CDK2
        >>> model = PharmacophoreModel.from_pdb("1hcl")

        >>> # the models grid data is stored as PharmacophoreModel.dic
        >>> # download the PDB file and create a Results
        >>> PDBResult("1hcl").download(<output_directory>)
        >>> result = Result(protein=Protein.from_file("<output_directory>/1hcl.pdb"), super_grids=model.dic)
        >>> with HotspotWriter("<output_directory>") as w:
        >>>     w.write(result)

        """
        temp = tempfile.mkdtemp()
        ref = PDBResult(pdb_code)
        ref.download(out_dir=temp, compressed=False)

        if representatives:
            print("Reading representative PDB codes ...")
            reps = []
            f = open(representatives, "r")
            entries = f.read().splitlines()

            for entry in entries:
                pdb_code, hetid, smiles = entry.split(",")
                reps.append((pdb_code, hetid))

        else:
            accession_id = PDBResult(
                pdb_code).protein.sub_units[0].accession_id
            results = PharmacophoreModel._run_query(accession_id)
            ligands = PharmacophoreModel._get_ligands(results)

            top = os.path.dirname(os.path.dirname(out_dir))
            PharmacophoreModel._to_file(ligands, top, fname="all_ligands.dat")

            k = int(round(len(ligands) / 5))
            if k < 2:
                k = 2
            cluster_dict, s = PharmacophoreModel._cluster_ligands(
                n=k, ligands=ligands)
            reps = [l[0] for l in cluster_dict.values() if len(l) != 0]

        targets = []

        for rep in reps:
            try:
                r = PDBResult(identifier=rep.structure_id)
                r.clustered_ligand = rep.chemical_id

            except:
                try:
                    r = PDBResult(identifier=rep[0])
                    r.clustered_ligand = rep[1]
                except:
                    raise AttributeError

            r.download(out_dir=temp, compressed=False)
            targets.append(r)

        prots, ligands = PharmacophoreModel._align_proteins(
            reference=ref, reference_chain=chain, targets=targets)

        try:
            with open(os.path.join(top, "silhouette.dat"), "w") as sfile:
                sfile.write("{}".format(s))
            PharmacophoreModel._to_file(reps, top, fname="representatives.dat")

        except:
            pass

        if out_dir:
            with io.MoleculeWriter(os.path.join(out_dir,
                                                "aligned_mols.mol2")) as w:
                for l in ligands:
                    w.write(l)

        return PharmacophoreModel.from_ligands(ligands=ligands,
                                               identifier=identifier)
Esempio n. 27
0
    def from_ligands(ligands, identifier, protein=None, settings=None):
        """
        creates a Pharmacophore Model from a collection of overlaid ligands

        :param `ccdc,molecule.Molecule` ligands: ligands from which the Model is created
        :param str identifier: identifier for the Pharmacophore Model
        :param `ccdc.protein.Protein` protein: target system that the model has been created for
        :param `hotspots.hs_pharmacophore.PharmacophoreModel.Settings` settings: Pharmacophore Model settings

        :return: :class:`hotspots.hs_pharmacophore.PharmacophoreModel`


        >>> from ccdc.io import MoleculeReader
        >>> from hotspots.hs_pharmacophore import PharmacophoreModel

        >>> mols = MoleculeReader("ligand_overlay_model.mol2")
        >>> model = PharmacophoreModel.from_ligands(mols, "ligand_overlay_pharmacophore")
        >>> # write to .json and search in pharmit
        >>> model.write("model.json")

        """
        cm_dic = crossminer_features()
        blank_grd = Grid.initalise_grid(
            [a.coordinates for l in ligands for a in l.atoms])
        feature_dic = {
            "apolar": blank_grd.copy(),
            "acceptor": blank_grd.copy(),
            "donor": blank_grd.copy()
        }

        if not settings:
            settings = PharmacophoreModel.Settings()

        if isinstance(ligands[0], Molecule):
            temp = tempfile.mkdtemp()

            with io.MoleculeWriter(join(temp, "ligs.mol2")) as w:
                for l in ligands:
                    w.write(l)
            ligands = list(io.CrystalReader(join(temp, "ligs.mol2")))

        try:
            Pharmacophore.read_feature_definitions()
        except:
            raise ImportError("Crossminer is only available to CSD-Discovery")

        feature_definitions = [
            fd for fd in Pharmacophore.feature_definitions.values()
            if fd.identifier != 'exit_vector' and fd.identifier != 'heavy_atom'
            and fd.identifier != 'hydrophobe' and fd.identifier != 'fluorine'
            and fd.identifier != 'bromine' and fd.identifier != 'chlorine'
            and fd.identifier != 'iodine' and fd.identifier != 'halogen'
        ]

        for fd in feature_definitions:
            detected = [fd.detect_features(ligand) for ligand in ligands]
            all_feats = [f for l in detected for f in l]

            if not all_feats:
                continue

            for f in all_feats:
                feature_dic[cm_dic[fd.identifier]].set_sphere(
                    f.spheres[0].centre, f.spheres[0].radius, 1)

        features = []
        for feat, feature_grd in feature_dic.items():
            peaks = feature_grd.get_peaks(min_distance=4, cutoff=1)
            for p in peaks:
                coords = Coordinates(p[0], p[1], p[2])
                projected_coordinates = None
                if feat == "donor" or feat == "acceptor":
                    if protein:
                        projected_coordinates = _PharmacophoreFeature.get_projected_coordinates(
                            feat, coords, protein, settings)
                features.append(
                    _PharmacophoreFeature(
                        projected=None,
                        feature_type=feat,
                        feature_coordinates=coords,
                        projected_coordinates=projected_coordinates,
                        score_value=feature_grd.value_at_coordinate(
                            coords, position=False),
                        vector=None,
                        settings=settings))

        return PharmacophoreModel(settings,
                                  identifier=identifier,
                                  features=features,
                                  protein=protein,
                                  dic=feature_dic)
Esempio n. 28
0
    def write(self, fname):
        """
        writes out pharmacophore. Supported formats:

        - ".cm" (*CrossMiner*),
        - ".json" (`Pharmit <http://pharmit.csb.pitt.edu/search.html/>`_),
        - ".py" (*PyMOL*),
        - ".csv",
        - ".mol2"

        :param str fname: path to output file
        """
        extension = splitext(fname)[1]

        if extension == ".cm":

            print "WARNING! Charged features not currently supported in CrossMiner!"
            pharmacophore = self._get_crossminer_pharmacophore()
            pharmacophore.write(fname)

        elif extension == ".csv":
            with open(fname, "wb") as csv_file:
                csv_writer = csv.writer(csv_file, delimiter=",")
                line = 'Identifier, Feature_type, x, y, z, score, ' \
                       'projected_x, projected_y, projected_z, ' \
                       'vector_x, vector_y, vector_z'

                for feature in self._features:
                    line += "{0},{1},{2},{3},{4},{5}".format(
                        self.identifier, feature.feature_type,
                        feature.feature_coordinates.x,
                        feature.feature_coordinates.y,
                        feature.feature_coordinates.z, feature.score_value)
                    if feature.projected_coordinates:
                        line += ",{0},{1},{2}".format(
                            feature.projected_coordinates.x,
                            feature.projected_coordinates.y,
                            feature.projected_coordinates.z)
                    else:
                        line += ",0,0,0"

                    if feature.vector:
                        line += ",{0},{1},{2}".format(feature.vector.x,
                                                      feature.vector.y,
                                                      feature.vector.z)
                    else:
                        line += ",0,0,0"

                    l = line.split(",")
                    csv_writer.writerow(l)

        elif extension == ".py":
            with open(fname, "wb") as pymol_file:
                lfile = "label_threshold_{}.mol2".format(self.identifier)

                pymol_out = pymol_imports()
                pymol_out += pymol_arrow()
                lines = self._get_pymol_pharmacophore(lfile)
                pymol_out += lines
                pymol_file.write(pymol_out)

            label = self.get_label(self)
            with io.MoleculeWriter(join(dirname(fname), lfile)) as writer:
                writer.write(label)

        elif extension == ".json":
            with open(fname, "w") as pharmit_file:
                pts = []
                interaction_dic = {
                    'apolar': 'Hydrophobic',
                    'donor': 'HydrogenDonor',
                    'acceptor': 'HydrogenAcceptor',
                    'negative': 'NegativeIon',
                    'positive': 'PositiveIon'
                }

                for feat in self._features:
                    if feat.vector:
                        point = {
                            "name": interaction_dic[feat.feature_type],
                            "hasvec": True,
                            "x": feat.feature_coordinates.x,
                            "y": feat.feature_coordinates.y,
                            "z": feat.feature_coordinates.z,
                            "radius": feat.settings.radius,
                            "enabled": True,
                            "vector_on": feat.settings.vector_on,
                            "svector": {
                                "x": feat.vector.x,
                                "y": feat.vector.y,
                                "z": feat.vector.z
                            },
                            "minsize": "",
                            "maxsize": "",
                            "selected": False
                        }
                    else:
                        point = {
                            "name": interaction_dic[feat.feature_type],
                            "hasvec": False,
                            "x": feat.feature_coordinates.x,
                            "y": feat.feature_coordinates.y,
                            "z": feat.feature_coordinates.z,
                            "radius": feat.settings.radius,
                            "enabled": True,
                            "vector_on": feat.settings.vector_on,
                            "svector": {
                                "x": 0,
                                "y": 0,
                                "z": 0
                            },
                            "minsize": "",
                            "maxsize": "",
                            "selected": False
                        }
                    pts.append(point)
                pharmit_file.write(json.dumps({"points": pts}))

        elif extension == ".mol2":
            mol = Molecule(identifier="pharmacophore_model")
            atom_dic = {
                "apolar": 'C',
                "donor": 'N',
                "acceptor": 'O',
                "negative": 'S',
                "positve": 'H'
            }

            pseudo_atms = [
                Atom(atomic_symbol=atom_dic[feat.feature_type],
                     atomic_number=14,
                     coordinates=feat.feature_coordinates,
                     label=str(feat.score_value)) for feat in self.features
            ]

            for a in pseudo_atms:
                mol.add_atom(a)

            with io.MoleculeWriter(fname) as w:
                w.write(mol)

        elif extension == ".grd":
            g = self._as_grid()
            g.write(fname)

        else:
            raise TypeError(
                """""{}" output file type is not currently supported.""".
                format(extension))
Esempio n. 29
0
    print target
    for pdb in pdbs:
        chain = chains[pdb]
        ligand_id = ligands[pdb]

        out_dir = os.path.join(base, target, pdb, "reference")
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)

        try:
            p = PharmacophoreModel._from_siena(pdb,
                                               ligand_id,
                                               mode,
                                               target,
                                               out_dir=out_dir)
            p.write(os.path.join(out_dir, "reference_pharmacophore.py"))

            prot = hs_io.HotspotReader(
                os.path.join(base, target, pdb, "out.zip")).read().protein

            hs = Results(protein=prot, super_grids=p.dic)

            with hs_io.HotspotWriter(out_dir) as wf:
                wf.write(hs)

            with io.MoleculeWriter(os.path.join(out_dir, "aligned.mol2")) as w:
                for l in p.representatives:
                    w.write(l)
        except RuntimeError:
            print "skipped {}".format(target)
Esempio n. 30
0
    def run(self):

        if not os.path.exists(self.args.output_directory):
            os.mkdir(self.args.output_directory)

        rms_cutoff = 1

        tmp = tempfile.mkdtemp()

        print("Searching the PDB")
        pdbs, entities = pdb_search(self.args.uniprot,
                                    self.args.max_resolution)

        download(pdbs, out_dir=tmp, processes=self.args.processes)

        if self.args.ref_pdb not in pdbs:
            print("Downloading reference data")
            download([self.args.ref_pdb], out_dir=tmp, processes=1)

        # chain to entity
        ref_prot = Protein.from_file(
            os.path.join(tmp, f"{self.args.ref_pdb}.pdb"))
        entity = [c.identifier
                  for c in ref_prot.chains].index(self.args.ref_chain)
        prepare_protein(pdb=self.args.ref_pdb, entity=entity, out_dir=tmp)

        print("Prepare protein")
        for pdb, entity in tqdm(zip(pdbs, entities), total=len(pdbs)):
            # other chains removed, max number chains = 1
            try:
                # list(filter(None.__ne__, L))
                prepare_protein(pdb=pdb, entity=entity, out_dir=tmp)
            except:
                print("ERROR", pdb, entity)
                pdbs.remove(pdb)
                entities.remove(entity)

        print("Aligning PDBs")
        # align
        args = zip([self.args.ref_pdb] * len(pdbs),
                   [self.args.binding_site_ligand] * len(pdbs), pdbs,
                   [tmp] * len(pdbs))

        with Pool(processes=self.args.processes) as pool:
            rms_list = list(tqdm(pool.imap(align, args), total=len(pdbs)))

        print("Extracting Ligands...")
        all_ligands = []
        # add the reference structure back to PDB list
        for pdb, rms in zip(pdbs + [self.args.ref_pdb], rms_list + [0]):
            if rms < rms_cutoff:
                all_ligands.extend(ligands_from_pdb(pdb, tmp))
        print(f"    {len(all_ligands)} detected")

        if self.args.binding_site_ligand:
            ref_prot = Protein.from_file(
                os.path.join(tmp, f"{self.args.ref_pdb}.pdb"))
            ref_mol = [
                lig for lig in ref_prot.ligands if lig.identifier ==
                f"{self.args.ref_chain}:{self.args.binding_site_ligand}"
            ][0]
            all_ligands = bs_filter(all_ligands, ref_mol)
            print(
                f"    Binding site filter, {len(all_ligands)} ligands remaining"
            )

        if self.args.no_solvents:
            all_ligands = remove_solvents(all_ligands)
            print(
                f"    Removed solvents and metals, {len(all_ligands)} ligands remaining"
            )

        if self.args.cluster:
            all_ligands = cluster_ligands(all_ligands)
            print(
                f"    Clustered ligands, {len(all_ligands)} ligands remaining")

        pdbs, chains, hetids, resnum = zip(*[[
            lig.identifier.split(":")[0],
            lig.identifier.split(":")[1],
            lig.identifier.split(":")[2][:3],
            lig.identifier.split(":")[2][3:]
        ] for lig in all_ligands])

        df = pd.DataFrame({
            "pdbs": pdbs,
            "chains": chains,
            "hetids": hetids,
            "resnum": resnum
        })
        df.to_csv(
            os.path.join(self.args.output_directory, "ligand_overlay.csv"))

        with io.MoleculeWriter(
                os.path.join(self.args.output_directory,
                             "ligand_overlay.mol2")) as w:
            for lig in all_ligands:
                # lig.add_hydrogens()
                w.write(lig)