def example_datasets( draw, min_size: int = 60, max_size: int = 130, directory: Path = Path("."), ) -> MultiCrystalDataset: dataset_paths_dict = parse_pandda_input(directory) paths_strategy = hypothesis.strategies.sampled_from( [x for x in dataset_paths_dict.keys()]) dataset_paths_subset = draw( hypothesis.strategies.lists( paths_strategy, unique=True, min_size=min_size, max_size=max_size, )) datasets = { dtag: Dataset( reflections_from_mtz(MTZFile( dataset_paths_dict[dtag]["mtz_path"])), structure_biopandas_from_pdb( PDBFile(dataset_paths_dict[dtag]["pdb_path"]))) for dtag in dataset_paths_subset } return MultiCrystalDataset(datasets)
def normalise_structure(reference_pdb_path, dtag, output_path): f = PDBFile(reference_pdb_path) structure = structure_biopython_from_pdb(f) box_origin = np.min(np.vstack( [atom.coord for atom in structure.structure.get_atoms()]), axis=0) print("\tBox origin is: {}".format(box_origin)) translated_structure = translate_structure(structure, np.eye(3), -box_origin) box_origin = np.min(np.vstack( [atom.coord for atom in translated_structure.structure.get_atoms()]), axis=0) print(box_origin) translated_structure.output(output_path / "{}_normalised.pdb".format(dtag))
def align_map_to_reference( dtag, reference_dtag, dataset_path, reference_pdb_path, output_path, min_res, structure_factors="FWT,PHWT", ): # Load structures f_ref = PDBFile(reference_pdb_path) reference_structure = structure_biopython_from_pdb(f_ref) f_moving = PDBFile(dataset_path["pdb_path"]) moving_structure = structure_biopython_from_pdb(f_moving) # Load xmap xmap = mdc3.types.real_space.xmap_from_path( dataset_path["mtz_path"], structure_factors, ) # Get box limits from reference structure box_limits = np.max( np.vstack([ atom.coord for atom in reference_structure.structure.get_atoms() ]), axis=0, ) print(box_limits) # Align and Get RTop to moving protein frame from alignment alignment_moving_to_ref = mdc3.functions.alignment.align( reference_structure.structure, moving_structure.structure, ) alignment_ref_to_moving = mdc3.functions.alignment.align( moving_structure.structure, reference_structure.structure, ) rotation = alignment_moving_to_ref.rotran[0] translation = alignment_moving_to_ref.rotran[1] alignment_moving_to_ref.apply(moving_structure.structure) print("translation: orthogonal to grid") print(translation) print("rotation") print(rotation) # Interpolate NX map in moving protein frame grid_params = [int(x) + 5 for x in box_limits] nxmap = mdc3.types.real_space.interpolate_uniform_grid( xmap, translation, np.transpose(rotation), grid_params=grid_params, ) nxmap_data = nxmap.export_numpy() origin_nxmap = clipper_python.NXmap_float( clipper_python.Grid( grid_params[0], grid_params[1], grid_params[2], ), clipper_python.RTop_orth( clipper_python.Mat33_double(np.eye(3)), clipper_python.Vec3_double(0, 0, 0), )) origin_nxmap.import_numpy( clipper_python.Coord_grid(0, 0, 0), nxmap_data, ) # Output to ccp4 # cell = xmap.xmap.cell cell = clipper_python.Cell( clipper_python.Cell_descr( grid_params[0], grid_params[1], grid_params[2], np.pi / 2, np.pi / 2, np.pi / 2, )) mdc3.types.real_space.output_nxmap( origin_nxmap, output_path / "{}_origin.ccp4".format(dtag), cell, ) mdc3.types.real_space.output_nxmap( nxmap, output_path / "{}.ccp4".format(dtag), cell, ) # Output aligned pdb moving_structure.output(output_path / "{}_aligned.pdb".format(dtag))
def mtz_to_ccp4( self, dtag, reference_pdb_path, dataset_path, output_path, min_res, structure_factors="FWT,PHWT", ): # Load structures f_ref = PDBFile(reference_pdb_path) reference_structure = structure_biopython_from_pdb(f_ref) # Load xmap # xmap = mdc3.types.real_space.xmap_from_path(dataset_path["mtz_path"], # structure_factors, # ) # Get box limits from reference structure box_limits_max = np.max( np.vstack([ atom.coord for atom in reference_structure.structure.get_atoms() ]), axis=0, ) box_limits_min = np.min( np.vstack([ atom.coord for atom in reference_structure.structure.get_atoms() ]), axis=0, ) # Interpolate NX map in moving protein frame grid_params = [int(x) + 4 for x in (box_limits_max - box_limits_min)] # nxmap = mdc3.types.real_space.interpolate_uniform_grid(xmap, # box_limits_min - np.array([2, 2, 2]), # np.eye(3), # grid_params=grid_params, # ) # # # Output to ccp4 # cell = clipper_python.Cell(clipper_python.Cell_descr(grid_params[0], # grid_params[1], # grid_params[2], # np.pi / 2, # np.pi / 2, # np.pi / 2, # ) # ) # # mdc3.types.real_space.output_nxmap(nxmap, # output_path / "{}.ccp4".format(dtag), # cell, # ) mtz = gemmi.read_mtz_file(str(dataset_path["mtz_path"])) all_data = np.array(mtz, copy=False) mtz.set_data(all_data[mtz.make_d_array() >= 3.0]) grid = mtz.transform_f_phi_to_map( "FWT", "PHWT", sample_rate=3, ) mp = gemmi.Ccp4Map() mp.grid = grid mp.update_ccp4_header(2, True) mp.write_ccp4_map(str(output_path / "{}.ccp4".format(dtag)))
from hypothesis import given, settings from hypothesis.strategies import just from pathlib import Path from mdc3.types.files import (PDBFile, MTZFile, ) from mdc3.types.structures import structure_biopandas_from_pdb from mdc3.types.reflections import (Reflections, reflections_from_mtz, new_reflections_from_reflections_at_res, ) @given(just(PDBFile(Path("../../data/dimple.pdb").resolve()))) def test_structure_biopandas_from_pdb(pdb_file: PDBFile) -> None: structure_biopandas_from_pdb(pdb_file) @given(just(MTZFile(Path("../../data/dimple.mtz").resolve()))) def test_reflections_from_mtz(mtz_file: MTZFile) -> None: reflections_from_mtz(mtz_file) @settings(deadline=1000) @given(just(reflections_from_mtz(MTZFile(Path("../../data/dimple.mtz").resolve()))), just(2), ) def test_new_reflections_from_reflections_at_res(reflections: Reflections, resolution: float,