def align(inputs): """""" try: ref_pdb, ref_mol, other_pdb, input_dir = inputs ref = Protein.from_file(os.path.join(input_dir, f"{ref_pdb}.pdb")) other_path = os.path.join(input_dir, f"{other_pdb}.pdb") other = Protein.from_file(other_path) if ref_mol: ref_mol_obj = [ lig for lig in ref.ligands if lig.identifier.split(":")[1] == ref_mol ][0] ref_bind_site = Protein.BindingSiteFromMolecule( protein=ref, molecule=ref_mol_obj, distance=12) else: ref_bind_site = None chain_superposition = Protein.ChainSuperposition() # other chains already striped rms, X = chain_superposition.superpose(ref.chains[0], other.chains[0], binding_site1=ref_bind_site) with io.MoleculeWriter(other_path) as w: w.write(other) return rms except: return 999
def __init__(self): super(self.__class__, self).__init__(description=__doc__) # handle command line arguments self.add_argument('protein', help='pdb_code of protein which was used in docking') self.add_argument('reference', help='pdb_code of reference') self.add_argument('chemical_id', help='PDB identifier for the docked ligand') self.add_argument('results', help='path to results files') self.add_argument('-r', '--chain_ref', default='A', help='Chain to used for alignment') self.add_argument('-p', '--chain_protein', default='A', help='Chain to used for alignment') self.args = self.parse_args() self.tmp = tempfile.mkdtemp() # download protein PDBResult(self.args.protein).download(self.tmp) self.protein = Protein.from_file( os.path.join(self.tmp, self.args.protein + ".pdb")) self.protein.add_hydrogens() # download reference PDBResult(self.args.reference).download(self.tmp) ref = Protein.from_file( os.path.join(self.tmp, self.args.reference + ".pdb")) ref.add_hydrogens() self.ref = self._align(self.protein, ref) self.reference_ligand = self._extract_ligands( protein=self.ref, ligand=self.args.chemical_id, chain=self.args.chain_ref)[0] with MoleculeWriter( os.path.join(os.path.dirname(os.path.realpath(__file__)), "reference.mol2")) as w: w.write(self.reference_ligand) self.results = MoleculeReader( os.path.join(os.path.dirname(os.path.realpath(__file__)), self.args.results)) self.rmsd_values = [] for l in self.results: self.rmsd_values.append(self.rmsd(l, self.reference_ligand))
def _align_proteins(reference, reference_chain, targets): """ align proteins by chain :param `ccdc.protein.Protein` reference: align to me :param str reference_chain: align to this chain :param list targets: list of `ccdc.protein.Protein` :return tup: list(:class:`ccdc.protein.Protein`) and list (:classa`ccdc.molecule.Molecule`) """ print("Aligning proteins to {}, chain {}...".format( reference.identifier, reference_chain)) aligned_prots = [] aligned_ligands = [] reference = Protein.from_file(reference.fname) reference.add_hydrogens() for t in tqdm(targets): prot = Protein.from_file(t.fname) prot.detect_ligand_bonds() prot.add_hydrogens() for l in prot.ligands: if str(t.clustered_ligand) == str( l.identifier.split(":")[1][0:3]): try: bs = Protein.BindingSiteFromMolecule(protein=prot, molecule=l, distance=6) chain = bs.residues[0].identifier.split(":")[0] except: break break else: continue if not chain: print("\n {} failed! No chain detected".format( t.identifier)) break try: binding_site_superposition = Protein.ChainSuperposition() (bs_rmsd, bs_transformation) = binding_site_superposition.superpose( reference[reference_chain], prot[chain]) aligned_prots.append(prot) for lig in prot.ligands: if str(t.clustered_ligand) == str( lig.identifier.split(":")[1][0:3]): if chain == str(lig.identifier.split(":")[0]): aligned_ligands.append(lig) except IndexError: print("\n {} failed!".format(t.identifier)) continue return aligned_prots, aligned_ligands
def generate_hotspot(self): # generate HS from grd or from protein print "Run Fragment Hotspots..." h = Hotspots() inputs = [ f for f in listdir(self.in_dir) if f.endswith(".grd") or f.endswith(".pdb") ] if len([p for p in inputs if "protein.pdb" in p]) == 0: raise ValueError("No protein file found") if inputs > 1: if self.charged == False: return h.from_grid_dic( super_grids={ "apolar": Grid.from_file(os.path.join(self.in_dir, "apolar.grd")), "donor": Grid.from_file(os.path.join(self.in_dir, "donor.grd")), "acceptor": Grid.from_file( os.path.join(self.in_dir, "acceptor.grd")) }, prot=Protein.from_file(self.protein), ) else: return h.from_grid_dic(super_grids={ "apolar": Grid.from_file(os.path.join(self.in_dir, "apolar.grd")), "donor": Grid.from_file(os.path.join(self.in_dir, "donor.grd")), "acceptor": Grid.from_file(os.path.join(self.in_dir, "acceptor.grd")), "negative": Grid.from_file(os.path.join(self.in_dir, "negative.grd")), "positive": Grid.from_file(os.path.join(self.in_dir, "positive.grd")) }, prot=Protein.from_file(self.protein)) else: #superstar isn't currently working on windows return h.from_protein(prot=os.path.join(self.in_dir, "protein.pdb"), charged_probes=self.charged, experimental_buriedness=True)
def run(self, cavity=True): """from fragment hotspot calc from protein""" h = Runner() settings = Runner.Settings(sphere_maps=False) if self.args.prepare is True: self.prepare_protein() else: self.prot = Protein.from_file(self.args.prot_fname) if cavity is True: cavs = Cavity.from_pdb_file(self.args.prot_fname) print(cavs) else: cavs = None result = h.from_protein(protein=self.prot, charged_probes=False, buriedness_method=self.args.buriedness_method, cavities=cavs, nprocesses=5, settings=settings) with HotspotWriter(path=self.in_dir, zip_results=self.args.zipped) as writer: writer.write(result)
def extracted_ligands(fnames, pdb_codes, cavities): """get ligands""" ligands_by_cavtiy = {} for ident in range(0, len(cavities)): ligands_by_cavtiy.update({ident: []}) for i, fname in enumerate(fnames): print i, "/", len(fnames) prot = Protein.from_file(fname) prot.remove_all_waters() prot.remove_all_metals() prot.detect_ligand_bonds() for ligand in prot.ligands: centroid = ligand.centre_of_geometry() for j, cavity in enumerate(cavities): if contains(cavity, centroid): try: het = ligand.identifier.split(":")[1][0:3] except IndexError: het = ligand.identifier if len(ligand.atoms) > 5 and het not in excluded_hetids: print het ligand.add_hydrogens() try: ligand.identifier = "{0} ({1})".format(pdb_codes[i].upper(), het) print ligand.identifier except IndexError: print ligand.identifier ligands_by_cavtiy[j].append(ligand) return ligands_by_cavtiy
def generate_layer(self, iterations=2, thickness=10): layer_dict = {} prot = Protein.from_file(self.protein) masked_grids = self.bcv_result._single_grid() for i in range(1, iterations + 1): if i == 1: #initial layer difference_layer = self._get_sphere_grid( template=self.bcv_result.super_grids["apolar"].copy(), molecule=io.MoleculeReader(self.fragment)[0]) self.inner = difference_layer hr = self._diff_to_map(diff=difference_layer, map=masked_grids, prot=prot) else: self.outer = self.inner.dilate() for j in range(1, thickness): self.outer = self.outer.dilate() difference_layer = self.outer - self.inner self.inner = self.outer hr = self._diff_to_map(diff=difference_layer, map=masked_grids, prot=prot) layer_dict.update({"{}".format(i): hr}) return layer_dict
def run(self): prot = Protein.from_file(self.input().path) mol = io.MoleculeReader('ligands/{}.sdf'.format(self.pdb))[0] h = Runner() s = h.Settings() s.apolar_translation_threshold = 15 s.polar_translation_threshold = 15 s.polar_contributions = False s.sphere_maps = True s.nrotations = 3000 hr = h.from_protein(prot, buriedness_method='ghecom', nprocesses=1, settings=s, cavities=mol) out_settings = HotspotWriter.Settings() out_settings.charged = False w = HotspotWriter(os.path.dirname(self.output().path), grid_extension=".grd", zip_results=True, settings=out_settings) w.write(hr)
def run(self): # create pharmacophore ref = PharmacophoreModel.from_pdb(pdb_code=self.pdb, chain=self.chain, representatives=self.input().path, identifier=self.pdb) ref.rank_features(max_features=6, feature_threshold=5) # write pymol file ref.write(self.output()["pymol"].path) # write Results file temp = tempfile.mkdtemp() PDBResult(self.pdb).download(temp) result = Results(protein=Protein.from_file( os.path.join(temp, "{}.pdb".format(self.pdb))), super_grids=ref.dic) out_settings = HotspotWriter.Settings() out_settings.charged = False with HotspotWriter(os.path.dirname(self.output()["grids"].path), grid_extension=".grd", zip_results=True, settings=out_settings) as w: w.write(result) # write aligned molecules with MoleculeWriter(self.output()['aligned_mols'].path) as w: for l in ref.aligned_ligands: w.write(l) # points points = ref._comparision_dict() with open(self.output()['points'].path, 'wb') as w: pickle.dump(points, w)
def load_reference_protein(self): self.fetch_pdb(self.reference_id, self.reference_pdb_file) reference_protein = Protein.from_file(self.reference_pdb_file) print('Reference protein {} chain {} '.format( reference_protein.identifier, self.reference_chain_id)) return reference_protein
def hot_calc(inputs): pdb, het, pdir = inputs p = Protein.from_file(os.path.join(pdir, f"{pdb}.pdb")) mol = MoleculeReader(os.path.join(pdir, f"{pdb}_{het}.mol2"))[0] runner = Runner() hr = runner.from_protein(p, nprocesses=3, cavities=mol) for p, g in hr.super_grids.items(): hr.super_grids[p] = g.max_value_of_neighbours() # with HotspotReader(os.path.join(pdir, "out.zip")) as r: # hr = [h for h in r.read() if h.identifier == "hotspot"][0] e = Extractor(hr) bv = e.extract_volume(volume=250) # smoothing for p, g in bv.super_grids.items(): bv.super_grids[p] = g.gaussian(sigma=0.5) bv.identifier = "bestvol" hr.identifier = "hotspot" with HotspotWriter(pdir) as w: w.write([hr, bv])
def get_ensemble(self, nrotations, charged=False): largest_lig = self.find_largest_ligand() lig = MoleculeReader(largest_lig)[0] prot = Protein.from_file(join(dirname(largest_lig), 'protein.mol2')) bs = Protein.BindingSiteFromMolecule(protein=prot, molecule=lig, distance=6.5) # prot_paths = glob(join(self.root_dir, '*', 'protein.mol2')) prot_paths = self.get_protein_paths() print(prot_paths) print(self.ensemble_name, len(prot_paths)) luigi.build([ ParalleliselRunner( prot_paths, nrotations, charged, data_source='KLIFS') ], local_scheduler=True, workers=30) #luigi.build([ParalleliselRunner(prot_paths, nrotations, charged)], local_scheduler=True, #workers=30) hot_paths = [ join(dirname(in_pdb), "fullsize_hotspots_{}".format(nrotations), "out.zip") for in_pdb in prot_paths ] return hot_paths
def __init__(self, path): self._supported_interactions = [ "apolar", "donor", "acceptor", "positive", "negative" ] self._supported_grids = [".grd", ".ccp4", ".acnt", ".dat"] self._not_hs_dir = ["best_islands", "peaks", "ins"] self._path = path ext = splitext(self._path)[1] if ext == ".zip": self._base = self._path_from_zip() else: self._base = path self._files = listdir(self._base) self._extensions = set( [splitext(f)[1] for f in self._files if f != "" or f != ".py"]) pfiles = [f for f in self._files if splitext(f)[1] == ".pdb"] if len(pfiles) > 1: print("WARNING! {} has been used as default protein".format( join(self._base, "protein.pdb"))) pfiles = [p for p in self._files if f == "protein.pdb"] self.protein = Protein.from_file(join(self._base, pfiles[0])) self.hs_dir = [ d for d in self._files if isdir(join(self._base, d)) and d not in self._not_hs_dir ]
def run(self): prot = Protein.from_file(self.input().path) prot.detect_ligand_bonds() prot.add_hydrogens() with io.MoleculeWriter(self.output().path) as w: for l in prot.ligands: if 'HEM' not in l.identifier: w.write(l)
def test_get_ligand(self): sources = ["file", "pdb"] for source in sources: tmp = f"testdata/wrapper_arpeggio/prepare/{source}" for pdb, hetid in self.examples.items(): protein1 = Protein.from_file(os.path.join(tmp, f"{pdb}_clean.pdb")) ligand1 = _get_ligand(protein1, hetid, "A") self.assertTrue(isinstance(ligand1, Molecule))
def _generate_result(self, path): with PushDir(path): files = set(listdir(path)) # fetch protein - this should always be protein.pdb prot_name = [f for f in files if f.split(".")[1] == self.supported_protein_extensions][0] prot = Protein.from_file(prot_name) files.remove(prot_name) # there should only be one grid extension in the directory, if there are more # then you can manually read in your results grid_extension = {f.split(".")[1] for f in files}.intersection(self.supported_grid_extensions) if len(grid_extension) > 1: raise IndexError("Too many grid types, create `hotspots.result.Results` manually") elif len(grid_extension) < 1: raise IndexError("No supported grid types found") elif list(grid_extension)[0] == "dat": raise NotImplementedError("Will put this in if requested") else: grid_extension = list(grid_extension)[0] # read hotspot grids stripped_files = {f.split(".")[0] for f in files} hotspot_grids = stripped_files.intersection(self.supported_interactions) super_grids = {p: Grid.from_file(f"{p}.{grid_extension}") for p in hotspot_grids} # read superstar grids if len([f.startswith("superstar") for f in files]) > 0 and self.read_superstar: superstar_grids = {p: Grid.from_file(f"superstar_{p}.{grid_extension}") for p in hotspot_grids} else: superstar_grids = None # read weighted_superstar grids if len([f.startswith("weighted") for f in files]) > 0 and self.read_weighted: weighted_grids = {p: Grid.from_file(f"weighted_{p}.{grid_extension}") for p in hotspot_grids} else: weighted_grids = None # fetch buriedness grid try: buriedness_name = [f for f in files if f.startswith("buriedness")][0] except IndexError: buriedness_name = None if buriedness_name and self.read_buriedness: buriedness = Grid.from_file(buriedness_name) else: buriedness = None return Results(super_grids=super_grids, protein=prot, buriedness=buriedness, superstar=superstar_grids, weighted_superstar=weighted_grids, identifier=basename(path))
def __init__(self, data, out_dir): self.data = data self.files = {'protein': [], 'ligands': []} self.out_dir = out_dir self._write() self._protein = Protein.from_file(self.files['protein'][0]) self._ligands = [ y for y in [MoleculeReader(x) for x in self.files['ligands']] ]
def prepare_protein(self): """default protein preparation settings on the protein""" self.prot = Protein.from_file(self.args.prot_fname) self.prot.add_hydrogens() for lig in self.prot.ligands: self.prot.remove_ligand(lig.identifier) self.prot.remove_all_metals() if self.args.remove_waters: self.prot.remove_all_waters()
def load_target(self, target_pdb_id): '''Load target protein chain :param target_pdb_id: The pdb identifier of the target ''' target_pdb_file = os.path.join(self.output_dir, 'pdb{}.pdb').format(target_pdb_id) self.fetch_pdb(target_pdb_id, target_pdb_file) target_protein = Protein.from_file(target_pdb_file) return target_protein
def run(self): prot = Protein.from_file(self.input()['protein'].path) hr = HotspotReader(self.input()['hs_result'].path).read() scored_prot = hr.score(prot) with open(self.output().path, 'w') as csv_file: csv_file.write("mol_id,atom_id,score\n") for a in scored_prot.heavy_atoms: out_str = "protein,{},{}\n".format(a.label, a.partial_charge) csv_file.write(out_str)
def prepare_protein(prot_path, ch=["A", "B"]): prot = Protein.from_file(prot_path) for chain in prot.chains: print(chain.identifier) if chain.identifier not in ch: prot.remove_chain(chain.identifier) prot.remove_all_waters() for ligand in prot.ligands: prot.remove_ligand(ligand.identifier) prot.remove_all_metals() prot.add_hydrogens() return prot
def prepare_protein(name): orign_protfile = '%s/%s_prot/%s_p.pdb' % (name, name, name) #orign_protfile = '%s/%s_prot/%s_p.mol2' % (name, name, name) mol = Protein.from_file(orign_protfile) ##name = os.path.basename(orign_protfile).split('_')[0] mol.remove_all_waters() mol.remove_unknown_atoms() mol.add_hydrogens() with MoleculeWriter('%s/%s_prot/%s_goldp.pdb' % (name, name, name)) as protein_writer: protein_writer.write(mol)
def prepare_protein(self): prot = Protein.from_file(str(self.in_file.resolve())) prot.remove_all_waters() prot.detect_ligand_bonds() for ligand in prot.ligands: prot.remove_ligand(ligand.identifier) prot.remove_all_metals() if not self.protonated: prot.add_hydrogens() return prot
def get_largest_binding_site(self): """ Returns the binding site created within 6.5A of the largest ligand :return: """ lig_fname = self.find_largest_ligand() lig = MoleculeReader(join(self.lig_dir, lig_fname))[0] prot = Protein.from_file( join(self.pdb_dir, lig_fname.replace("sdf", "pdb"))) bs = Protein.BindingSiteFromMolecule(protein=prot, molecule=lig, distance=6.5) return bs
def __init__(self, path, hs_pdb, ligand_pdb, ligand_identifier): self.path = path self.hs_pdb = hs_pdb self.ligand_pdb = ligand_pdb self.ligand_identifier = ligand_identifier self.temp = tempfile.mkdtemp() # download PDB file using PDB python API self.hs_pdb_info = PDBResult(identifier=self.hs_pdb) self.hs_pdb_info.download(out_dir=self.temp) self._hs_fname = os.path.join(self.temp, hs_pdb + ".pdb") # calculate hotspot using Hotspots API self.protein = self._prepare_protein(Protein.from_file(self._hs_fname)) # self.hr = self.calc_hr() # with hs_io.HotspotWriter(path=os.path.join(self.path), zip_results=True) as hw: # hw.write(self.hr) self.hr = hs_io.HotspotReader(path=os.path.join(self.path, "out.zip")).read() # download ligand PDB file using PDB python API self.ligand_pdb_info = PDBResult(identifier=self.ligand_pdb) self.ligand_pdb_info.download(out_dir=self.temp) self._ligand_fname = os.path.join(self.temp, ligand_pdb + ".pdb") # align target = self._prepare_protein(Protein.from_file(self._ligand_fname), remove_ligands=False) self.ligand_protein = self.align(self.protein, "A", target, "A") self.ligand = self.extract_ligand() # substructure search of the PDB self.search_ligands = self.similarity_search() # dock search ligands into hotspot protein self.docked_ligands = self.dock() self.rescored_ligands = self.rescore()
def generate_fake(self, buriedness=False, weighted=False, superstar=True): """ create a small set of grids for testing :param buriedness: :param weighted: :param superstar: :return: """ def populate_grid(template, num_spheres, radius=1, value=8, scaling='linear'): h = template.copy_and_clear() for i in range(1, num_spheres): x, y, z = [np.random.randint(low=2, high=ax - 2, size=1) for ax in h.nsteps] h.set_sphere(point=h.indices_to_point(x, y, z), radius=radius, value=value, scaling=scaling) return h protein = Protein.from_file("testdata/6y2g_A/binding_site.pdb") mol = MoleculeReader("testdata/6y2g_A/A_mol.mol2")[0] g = Grid.initalise_grid([a.coordinates for a in mol.atoms]) if buriedness: buriedness_grid = Grid.from_molecule(mol) else: buriedness_grid = None interactions = ["apolar", "donor", "acceptor"] super_grids = {p: populate_grid(template=g, num_spheres=3) for p in interactions} if superstar: superstar_grids = {p: populate_grid(template=g, num_spheres=3) for p in interactions} else: superstar_grids = None if weighted: weighted_superstar_grids = {p: populate_grid(template=g, num_spheres=3) for p in interactions} else: weighted_superstar_grids = None return Results(super_grids=super_grids, protein=protein, buriedness=buriedness_grid, superstar=superstar_grids, weighted_superstar=weighted_superstar_grids)
def run_hotspot_calculation(self, nrot=100000, method="ghecom", charged=True, sphere_maps=False, save_ligand=True): """ Runs the hotspots calculation on the specified PDB structure :return: """ if not self.out_dir: self.out_dir = self.make_savedir() if not self.protein_path: self.protein_path = self.find_protein() protein = self.prepare_protein() else: protein = Protein.from_file(self.protein_path) if save_ligand: self.extract_ligands() # log the run parameters self.log_runner(nrot) h = Runner() settings = h.Settings() settings.nrotations = nrot settings.apolar_translation_threshold = 15 settings.polar_translation_threshold = 15 settings.sphere_maps = sphere_maps result = h.from_protein(protein=protein, charged_probes=charged, probe_size=7, buriedness_method=method, cavities=None, nprocesses=5, settings=settings) #self.out_dir = self.make_savedir() # Save and zip the SuperStar Grids: self._save_superstar_grids(h) # Save and zip the Results with hs_io.HotspotWriter(self.out_dir, visualisation="pymol", grid_extension=".ccp4", zip_results=True) as writer: writer.write(result)
def load_protein(self, row): print("Getting protein {}".format(basename(row["Filename"]))) p = Protein.from_file(row["Filename"]) # Remove all chains we don't care about print(row["Chains"]) for chain in p.chains: if chain.identifier not in row["Chains"]: p.remove_chain(chain.identifier) # Get correct bonding for the ligand p.detect_ligand_bonds() p.identifier = "{}_{}".format(row["Ensemble ID"], row["ID"]) return p
def __init__(self, fname="protein.pdb"): if isabs(fname): self.fname = fname else: self.fname = abspath(fname) self.prot = Protein.from_file(fname) self._protein_preparation() self.out_dir = dirname(self.fname) self.ghecom_out = join(self.out_dir, "ghecom_out.pdb") self.ghecom_grid = self._initalise_grid() # add a more general way of getting to the "ghecom" cmd self.run_dir = "/home/pcurran/src"
def to_grid(target, pdb): out_dir = "Z:/patel_set/{}/{}".format(target, pdb) mols = MoleculeReader( join(out_dir, "reference_pharmacophore", "aligned_mols.mol2")) p = PharmacophoreModel.from_ligands(ligands=mols, identifier="test") result = Results(super_grids=p.dic, protein=Protein.from_file( join(out_dir, "hs", "{}.pdb".format(pdb)))) out = Helper.get_out_dir(join(out_dir, "reference_pharmacophore", "grids")) settings = HotspotWriter.Settings() settings.isosurface_threshold = [2, 5, 10] with HotspotWriter(path=out, zip_results=True, settings=settings) as w: w.write(result)