def refine(sample_filename, rec_filename): """ Refine the molecule against the map """ # Load the sample sample = parakeet.sample.load(sample_filename) # Get the molecule name assert sample.number_of_molecules == 1 name, _ = list(sample.iter_molecules())[0] # Get the PDB filename pdb_filename = parakeet.data.get_pdb(name) # Fit the molecule to the map maptools.fit( rec_filename, pdb_filename, output_pdb_filename="refined.pdb", resolution=3, ncycle=10, mode="rigid_body", log_filename="fit.log", )
def test_Sample(tmp_path, atom_data_4v5d): sample = parakeet.sample.Sample(os.path.join(tmp_path, "test_Sample.h5"), mode="w") assert sample.atoms_dataset_name((1, 2, 3)) == "X=000001; Y=000002; Z=000003" a = list(sample.atoms_dataset_range((1, 2, 3), (3, 4, 5))) assert (a[0][0] == (0, 0, 0)).all() assert (a[0][1] == (sample.step, sample.step, sample.step)).all() x0 = atom_data_4v5d.data[["x", "y", "z"]].min() + 200 x1 = atom_data_4v5d.data[["x", "y", "z"]].max() + 200 sample.add_molecule( atom_data_4v5d, positions=[(200, 200, 200)], orientations=[(0, 0, 0)], name="4v5d", ) sample.containing_box = (0, 0, 0), (400, 400, 400) sample.centre = (200, 200, 200) sample.shape = {"type": "cube", "cube": {"length": 400}} assert (sample.bounding_box == (x0, x1)).all() assert (sample.containing_box == ((0, 0, 0), (400, 400, 400))).all() assert sample.molecules == ["4v5d"] assert sample.number_of_molecular_models == 1 assert sample.number_of_molecules == 1 assert (sample.dimensions == (x1 - x0)).all() atoms, positions, orientations = sample.get_molecule("4v5d") for name, data in sample.iter_molecules(): assert name == "4v5d" assert sample.number_of_atoms == atom_data_4v5d.data.shape[0] for atoms in sample.iter_atoms(): pass atoms = sample.get_atoms_in_range((100, 100, 100), (300, 300, 300)).data assert atoms.shape[0] > 0 coords = atoms[["x", "y", "z"]].to_numpy() assert ((coords >= (100, 100, 100)) & (coords < (300, 300, 300))).all() sample.del_atoms( parakeet.sample.AtomDeleter(atom_data_4v5d, position=(200, 200, 200)) ) atoms = sample.get_atoms_in_range((0, 0, 0), (400, 400, 400)).data assert atoms.shape[0] == 0 sample.add_atoms(atom_data_4v5d) sample.info() sample.close()
def average_particles( scan, sample_filename, rec_filename, half_1_filename, half_2_filename, particle_size=0, ): """ Average particles to compute averaged reconstruction """ def rotate_array(data, rotation, offset): # Create the pixel indices az = numpy.arange(data.shape[0]) ay = numpy.arange(data.shape[1]) ax = numpy.arange(data.shape[2]) x, y, z = numpy.meshgrid(az, ay, ax, indexing="ij") # Create a stack of coordinates xyz = numpy.vstack( [ x.reshape(-1) - offset[0], y.reshape(-1) - offset[1], z.reshape(-1) - offset[2], ] ).T # create transformation matrix r = scipy.spatial.transform.Rotation.from_rotvec(rotation) # apply transformation transformed_xyz = r.apply(xyz) # extract coordinates x = transformed_xyz[:, 0] + offset[0] y = transformed_xyz[:, 1] + offset[1] z = transformed_xyz[:, 2] + offset[2] # Reshape x = x.reshape(data.shape) y = y.reshape(data.shape) z = z.reshape(data.shape) # sample result = scipy.ndimage.map_coordinates(data, [x, y, z], order=1) return result # Load the sample sample = parakeet.sample.load(sample_filename) # Get the sample centre centre = numpy.array(sample.centre) # Read the reconstruction file tomo_file = mrcfile.mmap(rec_filename) tomogram = tomo_file.data # Get the size of the volume voxel_size = numpy.array( ( tomo_file.voxel_size["x"], tomo_file.voxel_size["y"], tomo_file.voxel_size["z"], ) ) size = numpy.array(tomogram.shape)[[2, 0, 1]] * voxel_size # Loop through the assert sample.number_of_molecules == 1 for name, (atoms, positions, orientations) in sample.iter_molecules(): # Compute the box size based on the size of the particle so that any # orientation should fit within the box xmin = atoms.data["x"].min() xmax = atoms.data["x"].max() ymin = atoms.data["y"].min() ymax = atoms.data["y"].max() zmin = atoms.data["z"].min() zmax = atoms.data["z"].max() xc = (xmax + xmin) / 2.0 yc = (ymax + ymin) / 2.0 zc = (zmax + zmin) / 2.0 if particle_size == 0: half_length = ( int(ceil(sqrt((xmin - xc) ** 2 + (ymin - yc) ** 2 + (zmin - zc) ** 2))) + 1 ) else: half_length = particle_size // 2 length = 2 * half_length assert len(positions) == len(orientations) num_particles = len(positions) print( "Averaging %d %s particles with box size %d" % (num_particles, name, length) ) # Create the average array half_1 = numpy.zeros(shape=(length, length, length), dtype="float32") half_2 = numpy.zeros(shape=(length, length, length), dtype="float32") num_1 = 0 num_2 = 0 # Sort the positions and orientations by y positions, orientations = zip( *sorted(zip(positions, orientations), key=lambda x: x[0][1]) ) # Loop through all the particles for i, (position, orientation) in enumerate(zip(positions, orientations)): # Compute p within the volume # start_position = numpy.array([0, scan["start_pos"], 0]) p = position - (centre - size / 2.0) # - start_position p[2] = size[2] - p[2] print( "Particle %d: position = %s, orientation = %s" % ( i, "[ %.1f, %.1f, %.1f ]" % tuple(p), "[ %.1f, %.1f, %.1f ]" % tuple(orientation), ) ) # Set the region to extract x0 = numpy.floor(p).astype("int32") - half_length x1 = numpy.floor(p).astype("int32") + half_length offset = p - numpy.floor(p).astype("int32") # Get the sub tomogram print("Getting sub tomogram") sub_tomo = tomogram[x0[1] : x1[1], x0[2] : x1[2], x0[0] : x1[0]] if sub_tomo.shape == half_1.shape: # Set the data to transform data = sub_tomo # Reorder input vectors offset = numpy.array(data.shape)[::-1] / 2 + offset[[1, 2, 0]] rotation = -numpy.array(orientation)[[1, 2, 0]] rotation[1] = -rotation[1] # Rotate the data print("Rotating volume") data = rotate_array(data, rotation, offset) # Add the contribution to the average if bool(random.getrandbits(1)): half_1 += data num_1 += 1 else: half_2 += data num_2 += 1 # Average the sub tomograms print("Averaging half 1 with %d particles" % num_1) print("Averaging half 2 with %d particles" % num_2) if num_1 > 0: half_1 = half_1 / num_1 if num_2 > 0: half_2 = half_2 / num_2 # from matplotlib import pylab # pylab.imshow(average[half_length, :, :]) # pylab.show() # Save the averaged data print("Saving half 1 to %s" % half_1_filename) handle = mrcfile.new(half_1_filename, overwrite=True) handle.set_data(half_1) handle.voxel_size = tomo_file.voxel_size print("Saving half 2 to %s" % half_2_filename) handle = mrcfile.new(half_2_filename, overwrite=True) handle.set_data(half_2) handle.voxel_size = tomo_file.voxel_size