def crop_mrc(mrc_path, crop_path, x=0, y=0, z=0, dx=100, dy=100, dz=100, print_header_diff=False): """Crop specified part of a 3d mrc file, position cropped: mrc.data[x:x+dx, y:y+dy, z:z+dz]. Note that the axis order of mrc data is (x, y, z), different from that downstream tasks use, eg. read_mrc_data and imod: (z, y, x). Arguments: mrc_path -- source mrc file path crop_path -- destination path of cropped mrc file Keyword Arguments: x {int} -- lowerbound x coordinate to crop (default: {0}) y {int} -- lowerbound y coordinate to crop (default: {0}) z {int} -- lowerbound z coordinate to crop (default: {0}) dx {int} -- length of x to crop (default: {100}) dy {int} -- length of y to crop (default: {100}) dz {int} -- length of z to crop (default: {100}) print_header_diff {bool} -- whether to print difference between the cropped and original (default: {False}) """ # Use mmap for faster reading large mrcfile with mrcfile.mmap(mrc_path, mode='r') as mrc, mrcfile.new(crop_path) as mrc_crop: mrc_crop.set_data( mrc.data[x:x + dx, y:y + dy, z:z + dz]) # set_data automatically syncs header info with data mrc_crop.voxel_size = mrc.voxel_size mrcfile.validate(crop_path) # Print header diff if print_header_diff: mrc_header = io_file.read_mrc_header(mrc_path) crop_header = io_file.read_mrc_header(crop_path) diffs = [] for (k1, v1), (k2, v2) in zip(mrc_header.items(), crop_header.items()): if k1 == k2 and v1 != v2: if isinstance(v1, dict): assert len(v1) == len( v2), "Different dict size: {}, {}".format( len(v1), len(v2)) diff_dict = { k: (v1[k], v2[k]) for k in v1 if k in v2 and v1[k] != v2[k] } print("diff key: ", k1, "\ndiff_dict:\n", diff_dict) else: print("diff key: ", k1, "\n", v1, "\n", v2, "\n") diffs.append((v1, v2)) print("# diffs: ", len(diffs))
def main(): path, output = getParams() # Also, we can crop and only use part of the mrc image instead of binning for tasks requiring higher resolution # crop_path = 'cropped.mrc' # crop_mrc(path, crop_path) mrc_header = io_file.read_mrc_header(path) voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC'][ 'nx'] / 10 print("voxel_spacing_in_nm: %s" % voxel_spacing_in_nm) # Note: with our test data, voxel_spacing_in_nm has 0 and next division fails. sigma1 = 2 try: # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2 sigma1 = max(int(7 / voxel_spacing_in_nm), sigma1) except Exception as e: pass print('sigma1=%d' % sigma1) # For particular tomogram, larger sigma1 value may have better results. # Use IMOD to display selected peaks and determine best sigma1. # For 'aitom_demo_cellular_tomogram.mrc', sigma1 is 5 rather than 3 for better performance # (in this tomogram, 7nm corresponds to 3.84 pixels) # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1) partition_op = { 'nonoverlap_width': sigma1 * 20, 'overlap_width': sigma1 * 10, 'save_vg': False } result = picking(path, s1=sigma1, s2=sigma1 * 1.1, t=3, find_maxima=False, partition_op=partition_op, multiprocessing_process_num=100) print("%d particles detected, containing redundant peaks" % len(result)) # remove redundant peaks result = do_filter(pp=result, peak_dist_min=sigma1, op=None) print("peak number reduced to %d" % len(result)) pprint(result[:5]) # generate file for 3dmod json_data = [] for i in range(len(result)): loc_np = result[i]['x'] loc = [] for j in range(len(loc_np)): loc.append(loc_np[j].tolist()) json_data.append({'peak': {'loc': loc}}) with open(output, 'w') as f: json.dump(json_data, f)
def main(): # Download from: https://cmu.box.com/s/9hn3qqtqmivauus3kgtasg5uzlj53wxp path = '/ldap_shared/home/v_zhenxi_zhu/data/aitom_demo_cellular_tomogram.mrc' # Also, we can crop and only use part of the mrc image instead of binning for tasks requiring higher resolution # crop_path = 'cropped.mrc' # crop_mrc(path, crop_path) mrc_header = io_file.read_mrc_header(path) voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC'][ 'nx'] / 10 sigma1 = max( int(7 / voxel_spacing_in_nm), 2 ) # 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2 # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1) partition_op = { 'nonoverlap_width': sigma1 * 20, 'overlap_width': sigma1 * 10, 'save_vg': False } result = picking(path, s1=sigma1, s2=sigma1 * 1.1, t=3, find_maxima=False, partition_op=partition_op, multiprocessing_process_num=100) print("%d particles detected, containing redundant peaks" % len(result)) result = do_filter(pp=result, peak_dist_min=sigma1, op=None) # remove redundant peaks print("peak number reduced to %d" % len(result)) pprint(result[:5]) # Display selected peaks using imod/3dmod (http://bio3d.colorado.edu/imod/) a = io_file.read_mrc_data(path) json_data = [] # generate file for 3dmod for i in range(len(result)): loc_np = result[i]['x'] loc = [] for j in range(len(loc_np)): loc.append(loc_np[j].tolist()) json_data.append({'peak': {'loc': loc}}) with open('data_json_file.json', 'w') as f: json.dump(json_data, f) dj = json_data x = N.zeros((len(dj), 3)) for i, d in enumerate(dj): x[i, :] = N.array(d['peak']['loc']) l = generate_lines(x_full=x, rad=sigma1) display_map_with_lines(l=l, map_file=path)
def select_sigma(self): mrc_header = io_file.read_mrc_header(self.path) voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC']['nx'] / 10 # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2 return max(7 / voxel_spacing_in_nm, 2)
def main(): # Download from: https://cmu.box.com/s/9hn3qqtqmivauus3kgtasg5uzlj53wxp path = '/ldap_shared/home/v_zhenxi_zhu/data/aitom_demo_single_particle_tomogram.mrc' # Also, we can crop and only use part of the mrc image instead of binning for tasks requiring higher resolution # crop_path = 'cropped.mrc' # crop_mrc(path, crop_path) mrc_header = io_file.read_mrc_header(path) voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC'][ 'nx'] / 10 sigma1 = max( int(7 / voxel_spacing_in_nm), 2 ) # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2 print('sigma1=%d' % sigma1) # For particular tomogram, larger sigma1 value may have better results. Use IMOD to display selected peaks and determine best sigma1. # For 'aitom_demo_cellular_tomogram.mrc', sigma1 is 5 rather than 3 for better performance(in this tomogram, 7nm corresponds to 3.84 pixels) # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1) partition_op = { 'nonoverlap_width': sigma1 * 20, 'overlap_width': sigma1 * 10, 'save_vg': False } result = picking(path, s1=sigma1, s2=sigma1 * 1.1, t=3, find_maxima=False, partition_op=partition_op, multiprocessing_process_num=10, pick_num=1000) print("DoG done, %d particles picked" % len(result)) pprint(result[:5]) # (Optional) Save subvolumes of peaks for autoencoder input dump_subvols = True if dump_subvols: # use later for autoencoder subvols_loc = "demo_single_particle_subvolumes.pickle" from aitom.classify.deep.unsupervised.autoencoder.autoencoder_util import peaks_to_subvolumes a = io_file.read_mrc_data(path) d = peaks_to_subvolumes(im_vol_util.cub_img(a)['vt'], result, 32) io_file.pickle_dump(d, subvols_loc) print("Save subvolumes .pickle file to:", subvols_loc) # Display selected peaks using imod/3dmod (http://bio3d.colorado.edu/imod/) ''' #Optional: smooth original image a = io_file.read_mrc_data(path) path =path[:-5]+'_smoothed'+path[-4:] temp = im_vol_util.cub_img(a) s1 = sigma1 s2=sigma1*1.1 vg = dog_smooth(temp['vt'], s1,s2) #vg = smooth(temp['vt'], s1) TIM.write_data(vg,path) ''' json_data = [] # generate file for 3dmod for i in range(len(result)): loc_np = result[i]['x'] loc = [] for j in range(len(loc_np)): loc.append(loc_np[j].tolist()) json_data.append({'peak': {'loc': loc}}) with open('data_json_file.json', 'w') as f: json.dump(json_data, f) dj = json_data x = N.zeros((len(dj), 3)) for i, d in enumerate(dj): x[i, :] = N.array(d['peak']['loc']) l = generate_lines(x_full=x, rad=sigma1) display_map_with_lines(l=l, map_file=path)
for j in range(cluster_center_number)]) for i in range(0, b.shape[0]): for j in range(0, b.shape[1]): for k in range(0, b.shape[2]): sum_f[label[i][j][k]][1] = sum_f[label[i][j][k]][1] + 1 sum_f[label[i][j] [k]][0] = sum_f[label[i][j][k]][0] + b[i][j][k] for i in range(cluster_center_number): assert sum_f[i][1] > 0 result[i] = sum_f[i][0] / sum_f[i][1] return result if __name__ == "__main__": path = './aitom_demo_single_particle_tomogram.mrc' # file path mrc_header = io_file.read_mrc_header(path) a = io_file.read_mrc_data(path) # volume data assert a.shape[0] > 0 a = a.astype(np.float32) print("file has been read, shape is", a.shape) start_time = time.time() saliency_detection(a=a, gaussian_sigma=2.5, gabor_sigma=14.0, gabor_lambda=13.0, cluster_center_number=10000, multiprocessing_num=0, pick_num=1000, save_flag=True) end_time = time.time() print('saliency detection takes', end_time - start_time, 's')