def create_atomic_hotspots(self, superstar_grids_dir): """ :param superstar_grids_dir: path to where the superstar grids are stored :return: """ atomic_hotspots = [] # Read in the SuperStar and Buriedness info probes = ['donor', 'acceptor', 'apolar', 'positive', 'negative'] b_grid = Grid.from_file( str(Path(superstar_grids_dir, 'buriedness.ccp4').resolve())) for p in probes: g_path = Path(superstar_grids_dir, f'superstar_{p}.ccp4') if g_path.exists(): print(f" found grid for probe of type {p}") p_grid = Grid.from_file(str(g_path.resolve())) ahs = _AtomicHotspotResult(identifier=p, grid=p_grid, buriedness=b_grid) atomic_hotspots.append(ahs) else: continue return atomic_hotspots
def _generate_result(self, path): with PushDir(path): files = set(listdir(path)) # fetch protein - this should always be protein.pdb prot_name = [f for f in files if f.split(".")[1] == self.supported_protein_extensions][0] prot = Protein.from_file(prot_name) files.remove(prot_name) # there should only be one grid extension in the directory, if there are more # then you can manually read in your results grid_extension = {f.split(".")[1] for f in files}.intersection(self.supported_grid_extensions) if len(grid_extension) > 1: raise IndexError("Too many grid types, create `hotspots.result.Results` manually") elif len(grid_extension) < 1: raise IndexError("No supported grid types found") elif list(grid_extension)[0] == "dat": raise NotImplementedError("Will put this in if requested") else: grid_extension = list(grid_extension)[0] # read hotspot grids stripped_files = {f.split(".")[0] for f in files} hotspot_grids = stripped_files.intersection(self.supported_interactions) super_grids = {p: Grid.from_file(f"{p}.{grid_extension}") for p in hotspot_grids} # read superstar grids if len([f.startswith("superstar") for f in files]) > 0 and self.read_superstar: superstar_grids = {p: Grid.from_file(f"superstar_{p}.{grid_extension}") for p in hotspot_grids} else: superstar_grids = None # read weighted_superstar grids if len([f.startswith("weighted") for f in files]) > 0 and self.read_weighted: weighted_grids = {p: Grid.from_file(f"weighted_{p}.{grid_extension}") for p in hotspot_grids} else: weighted_grids = None # fetch buriedness grid try: buriedness_name = [f for f in files if f.startswith("buriedness")][0] except IndexError: buriedness_name = None if buriedness_name and self.read_buriedness: buriedness = Grid.from_file(buriedness_name) else: buriedness = None return Results(super_grids=super_grids, protein=prot, buriedness=buriedness, superstar=superstar_grids, weighted_superstar=weighted_grids, identifier=basename(path))
def make_max_difference_maps(io): probes = ["donor", "acceptor", "apolar"] for probe in probes: g1 = Grid.from_file( join(io.ensemble_dirs[e1], "{}_{}_max.ccp4".format(e1, probe))) g2 = Grid.from_file( join(io.ensemble_dirs[e2], "{}_{}_max.ccp4".format(e2, probe))) diff_g = g1 - g2 diff_g.write( join(io.params.pipeline_root, "diff_{}_{}_{}.ccp4").format(e1, e2, probe))
def make_thresholded_maps(io, e1, e2): """ :param io: EnsembleIO instance. :param str ens1: name of reference ensemble :param str ens2: name of off-target ensemble :return: """ t_dir = join(io.params.pipeline_root, "thresholded_hotspot_maps") if not exists(t_dir): os.mkdir(t_dir) print(t_dir) probes = ["donor", "acceptor", "apolar"] for probe in probes: ge1 = pickle.load(open(io.ensemble_maps[e1][probe], "r")) ge2 = pickle.load(open(io.ensemble_maps[e2][probe], "r")) iarr = ge1.get_difference_map(ge2, tolerance=0) diff = Grid.from_file( join(io.params.pipeline_root, "diff_{}_{}_{}.ccp4").format(e1, e2, probe)).get_array() over3 = (iarr > 3) * diff gover3 = ge1.save_grid(over3) gover3.write( join(t_dir, "diff_{}_{}_{}_over3.ccp4".format(e1, e2, probe))) under3 = (iarr < 3) * diff gunder3 = ge1.save_grid(under3) gunder3.write( join(t_dir, "diff_{}_{}_{}_under3.ccp4".format(e1, e2, probe)))
def test_write_real_single(self): base = "testdata/1hcl" interactions = ["donor", "acceptor", "apolar"] super_grids = {p: Grid.from_file(os.path.join(base, f"{p}.grd")) for p in interactions} superstar_grids = {p: Grid.from_file(os.path.join(base, f"superstar_{p}.grd")) for p in interactions} buriedness = Grid.from_file(os.path.join(base, "buriedness.grd")) prot = Protein.from_file(os.path.join(base, "protein.pdb")) hr = Results(super_grids=super_grids, protein=prot, buriedness=buriedness, superstar=superstar_grids) settings = HotspotWriter.Settings() settings.output_superstar = True with HotspotWriter("testdata/hs_io/minimal_all_grids_real", settings=settings) as w: w.write(hr)
def _get_grids(self, sub_dir=None): """ create a grid dictorionary :return: """ if sub_dir: base = join(self._base, sub_dir) self._files = listdir(base) self._extensions = set( [splitext(f)[1] for f in self._files if f != '' or f != '.py']) else: base = self._base if ".dat" in self._extensions: grid_dic = { splitext(fname)[0]: Grid.from_array(join(base, fname)) for fname in [ f for f in self._files if splitext(f)[1] == ".grd" and splitext(f)[0] in self._supported_interactions ] } try: buriedness = Grid.from_array(join(self.base, "buriedness.dat")) except RuntimeError: buriedness = None else: ext = list( set(self._extensions).intersection(self._supported_grids)) if len(ext) == 1: grid_dic = { splitext(fname)[0]: Grid.from_file(join(base, fname)) for fname in [ f for f in self._files if splitext(f)[1] == ext[0] and splitext(f)[0] in self._supported_interactions ] } try: buriedness = Grid.from_file("buriedness{}".format(ext[0])) except RuntimeError: buriedness = None else: raise RuntimeError("Opps, something went wrong.") return grid_dic, buriedness
def run_hotspot_calculation(self, method="ghecom"): """ Runs the hotspots calculation on the specified PDB structure :return: """ h = Runner() settings = h.Settings() settings.nrotations = self.number_rotations settings.apolar_translation_threshold = 15 settings.polar_translation_threshold = 15 settings.sphere_maps = self.spheres # Check if SuperStar and Ghecom have already been run. super_archive_path = Path(self.out_dir.parent, "superstar_grids.zip") if super_archive_path.exists(): super_tmp_path = Path(self.out_dir.parent, super_archive_path.stem) if not super_tmp_path.exists(): super_tmp_path.mkdir() unpack_archive(super_archive_path, super_tmp_path, 'zip') b_grid = Grid.from_file( str(Path(super_tmp_path, 'buriedness.ccp4').resolve())) result = h.from_superstar( protein=self.prepare_protein(), superstar_grids=self.create_atomic_hotspots(super_tmp_path), buriedness=b_grid, charged_probes=self.charged, settings=settings, clear_tmp=True) rmtree(super_tmp_path) else: result = h.from_protein(protein=self.prepare_protein(), charged_probes=self.charged, probe_size=7, buriedness_method=method, cavities=None, nprocesses=1, settings=settings) # Save and zip the SuperStar Grids: self._save_superstar_grids(h) # Save and zip the Results with hs_io.HotspotWriter(str(self.out_dir.resolve()), visualisation="pymol", grid_extension=".ccp4", zip_results=True) as writer: writer.write(result) print(f"out_file: {str(Path(self.out_dir, 'out.zip').resolve())}") return Path(self.out_dir, 'out.zip')
def test_edge_detection(self): self.selected = Grid.from_file("testdata/result/molA.grd") edge = self.selected.edge_detection() self.assertLess(len(edge), self.selected.count_grid()) f = PyMOLFile() for i, n in enumerate(edge): f.commands += PyMOLCommands.sphere(f"sphere_{i}", (0, 0, 1, 1), n, 0.1) f.commands += PyMOLCommands.load_cgo(f"sphere_{i}", f"sphere_{i}_obj") f.write("testdata/grid_extension/edge_detection.py")
def test_write_pymol_isoslider(self): # read in manually path = "testdata/hs_io/minimal_all_grids/out.zip" base = tempfile.mkdtemp() with zipfile.ZipFile(path) as hs_zip: hs_zip.extractall(base) base = os.path.join(base, "hotspot") interactions = ["donor", "acceptor", "apolar"] super_grids = {p: Grid.from_file(os.path.join(base, f"{p}.grd")) for p in interactions} superstar_grids = {p: Grid.from_file(os.path.join(base, f"superstar_{p}.grd")) for p in interactions} prot = Protein.from_file(os.path.join(base, "protein.pdb")) hr = Results(super_grids=super_grids, protein=prot, superstar=superstar_grids) hr.identifier = "hotspot" settings = HotspotWriter.Settings() settings.output_superstar = True writer = HotspotWriter("testdata/hs_io/minimal_all_grids", settings=settings) # we won't actually write writer.pymol_out.commands += writer._write_pymol_isosurfaces(hr.super_grids, "hotspot", "hotspot", "fhm") writer.pymol_out.commands += writer._write_pymol_isosurfaces(hr.superstar, "hotspot", "hotspot", "superstar") writer._write_pymol_isoslider(hr) writer.pymol_out.write("testdata/hs_io/minimal_all_grids/test_write_pymol_isoslider.py")
def plot_distribution_histograms(io, mode): probes = ['acceptor', 'apolar', 'donor'] for e in io.ensembles: grid_dict = {} for probe in probes: paths = glob(join(io.ensemble_dirs[e], '*.ccp4')) print(paths) path = [p for p in paths if probe in p and mode in p][0] print(path) grid_dict[probe] = Grid.from_file(path) get_grid_dic_histograms(grid_dict, out_dir=io.ensemble_dirs[e], prot_name=e, suffix=mode)
def _get_hotspot(self, cav_id): """ calculate hotspot map from pre-calculated superstar and buriedness grids :param cav_id: :return: """ # inputs prot = Protein.from_file(self.apo_prep) sr = HotspotReader(path=os.path.join(self.superstar[cav_id], "out.zip")).read() superstar = [_AtomicHotspotResult(identifier=ident, grid=grid, buriedness=None) for ident, grid in sr.super_grids.items()] buriedness = Grid.from_file(self.buriedness) # tasks start = time.time() h = Runner() s = h.Settings() s.apolar_translation_threshold = 14 s.polar_translation_threshold = 14 s.polar_contributions = False s.sphere_maps = False s.nrotations = 3000 hr = h.from_superstar(prot, superstar, buriedness, settings=s, clear_tmp=True) finish = time.time() # output if not os.path.exists(self.hotspot[cav_id]): os.mkdir(self.hotspot[cav_id]) with open(self.hotspot_time[cav_id], 'w') as t: t.write(str(finish - start)) with HotspotWriter(self.hotspot[cav_id], zip_results=True) as writer: writer.write(hr)
def get_ensemble_array(self): """ Reads in grids, converts them to 3d numpy arrays, and stacks them into 4d numpy array, which holds the information for the ensemble. :return: """ # Initialise the array ensemble_array = None # Needed for converting between Cartesian coordinates and indices. rec_spacing = 1.0 / self.spacing # Fill in ensemble array; i counts the number of grids that have been added. i = 0 for p in self.paths: # Load in grid g = Grid.from_file(p) # Check the spacing of the grid. If different, continue and add to log. if g.spacing != self.spacing: print("Grid at {} has wrong spacing (found {}, expected {})". format(p, g.spacing, self.spacing)) continue # Counter i keeps track of how many grids we have added i += 1 # Get the dimensions of the grid: curr_dims = np.array(g.bounding_box) # Convert to numpy array arr = g.get_array() # Create the ensemble if i == 1: # Store the dimensions of the ensemble ens_dims = curr_dims # Put in as first element of the ensemble array ensemble_array = arr elif i == 2: origin_diff = (curr_dims[0] - ens_dims[0]) * rec_spacing far_diff = ((curr_dims[1] - ens_dims[1])) * rec_spacing # Padding both arrays to make them the same size (and so stackable): arr = self.pad_array(arr, origin_diff, far_diff) ensemble_array = self.pad_array(ensemble_array, -origin_diff, -far_diff) # Stacking 2 3D arrays creates a 4D array. ensemble_array = np.stack((ensemble_array, arr), axis=-1) # Update the ensemble dimensions ens_dims[0] = np.minimum(ens_dims[0], curr_dims[0]) ens_dims[1] = np.maximum(ens_dims[1], curr_dims[1]) else: origin_diff = (curr_dims[0] - ens_dims[0]) * rec_spacing far_diff = ((curr_dims[1] - ens_dims[1])) * rec_spacing # Padding both arrays to make them the same size: arr = self.pad_array(arr, origin_diff, far_diff) # Ensemble array is now 4D. ensemble_array = np.pad( ensemble_array, ((self.relu(-origin_diff[0]), self.relu(far_diff[0])), (self.relu(-origin_diff[1]), self.relu(far_diff[1])), (self.relu(-origin_diff[2]), self.relu(far_diff[2])), (0, 0)), "constant", constant_values=0) # Np.stack stacks along a new axis, but ensemble_array is already 4D, so use np.append instead. # When using np.append, arrays have to be the same dimension, so we expand arr with an empty 4th dimension. arr = np.expand_dims(arr, axis=3) print(arr.shape, ensemble_array.shape) ensemble_array = np.append(ensemble_array, arr, axis=3) # Update the ensemble dimensions ens_dims[0] = np.minimum(ens_dims[0], curr_dims[0]) ens_dims[1] = np.maximum(ens_dims[1], curr_dims[1]) self.dimensions = ens_dims self.ensemble_map_dict[i - 1] = p self.ensemble_array = ensemble_array
def setUp(self): # A buriedness grid self.buriedness = Grid.from_file("testdata/result/buriedness.grd") self.single_peak = random_grid(1)