def picking(path, s1, s2, t, find_maxima=True, partition_op=None, multiprocessing_process_num=0): a = io_file.read_mrc_data(path) print("file has been read") temp = im_vol_util.cub_img(a) a_im = temp['im'] # image data a_vt = temp['vt'] # volume data peaks = peak__partition( a_vt, s1=s1, s2=s2, find_maxima=find_maxima, partition_op=partition_op, multiprocessing_process_num=multiprocessing_process_num) # using DoG to detect all peaks, may contain peaks caused by noise # calculate threshold T and delete peaks whose val are smaller than threshold # Related paper: Pei L, Xu M, Frazier Z, Alber F. Simulating Cryo-Electron Tomograms of Crowded Mixtures of Macromolecular Complexes and Assessment of Particle Picking. BMC Bioinformatics. 2016; 17: 405. M = peaks[0]['val'] # max val of all peaks m = peaks[len(peaks) - 1]['val'] # min val of all peaks T = m + t * (M - m) / 20 for i in range(len(peaks)): if peaks[i]['val'] < T: res = peaks[0:i - 1] break print("T=m+t*(M-m)/20 \nT=%f m=%f t=%f M=%f" % (T, m, t, M)) return res
def view_tomo(self,sigma=2,R=10): # d = {v_siz:(32,32,32), vs:{uuid0:{center, v, id}, uuid1:{center, v, id} ... }} subvols_loc = os.path.join(self.dump_path,"demo_single_particle_subvolumes.pickle") d = io_file.pickle_load(subvols_loc) a = io_file.read_mrc_data(self.path) if 'self.centers' not in dir(): centers = [] uuids = [] for k,v in d['vs'].items(): if v['v'] is not None: centers.append(v['center']) uuids.append(k) self.centers = centers self.uuids = uuids # denoise a_smooth = smooth(a,sigma) for slice_num in range(a_smooth.shape[2]): centers = np.array(centers) slice_centers = centers[(centers[:,2]-slice_num)**2<R**2] img = a_smooth[:,:,slice_num] plt.rcParams['figure.figsize'] = (15.0, 12.0) fig = plt.figure() ax = fig.add_subplot(111) plt.axis('off') for center_num in range(len(slice_centers)): y, x = slice_centers[center_num][0:2] r = np.sqrt(R**2 - (slice_centers[center_num][2]-slice_num)**2) circle = plt.Circle((x, y), r, color='b', fill=False) plt.gcf().gca().add_artist(circle) ax_u = ax.imshow(img, cmap = 'gray')
def view_subtom(self,subvol_num,sigma=2,R=10): subvols_loc = os.path.join(self.dump_path,"demo_single_particle_subvolumes.pickle") d = io_file.pickle_load(subvols_loc) a = io_file.read_mrc_data(self.path) # denoise a_smooth = smooth(a,sigma) if 'self.centers' not in dir(): centers = [] uuids = [] for k,v in d['vs'].items(): if v['v'] is not None: centers.append(v['center']) uuids.append(k) self.centers = centers self.uuids = uuids y, x, z = self.centers[subvol_num] img = a_smooth[:,:,z] plt.rcParams['figure.figsize'] = (10.0, 8.0) fig = plt.figure() ax = fig.add_subplot(111) circle = plt.Circle((x, y), R, color='b', fill=False) plt.gcf().gca().add_artist(circle) plt.axis('off') output_str = '%d of %d, uuid = %s' % ( subvol_num, len(centers), self.uuids[subvol_num]) plt.title(output_str) ax_u = ax.imshow(img, cmap = 'gray') save_path = 'tmp_sub.png' # plt.imsave(save_path, img, cmap='gray') plt.savefig(save_path) return save_path
def picking(path, s1, s2, t, find_maxima=True, partition_op=None, multiprocessing_process_num=0, pick_num=None): ''' parameters: path:file path s1:sigma1 s2:sigma2 t:threshold level find_maxima:peaks appears at the maximum/minimum multiprocessing_process_num: number of multiporcessing partition_op: partition the volume for multithreading, is a dict consists 'nonoverlap_width', 'overlap_width' and 'save_vg' pick_num: the max number of particles to pick out # Take a two-dimensional image as an example, if the image size is 210*150(all in pixels), nonoverlap_width is 60 and overlap_width is 30. # It will be divided into 6 pieces for different threads to process. The ranges of their X and Y are # (first line) (0-90)*(0-90) (60-150)*(0-90) (120-210)*(0-90) (0-90) # (second line) (0-90)*(60-150) (60-150)*(60-150) (120-210)*(60-150) In general, s2=1.1*s1, s1 and t depend on particle size and noise. In practice, s1 should be roughly equal to the particle radius(in pixels). In related paper, the model achieves highest comprehensive score when s1=7 and t=3. return: a list including all peaks information (in descending order of value), each element in the return list looks like: {'val': 281.4873046875, 'x': [1178, 1280, 0], 'uuid': '6ad66107-088c-471e-b65f-0b3b2fdc35b0'} 'val' is the score of the peak when picking, only the score is higher than the threshold will the peak be selected. 'x' is the center of the peak in the tomogram. 'uuid' is an unique id for each peak. ''' a = io_file.read_mrc_data(path) print("file has been read") temp = im_vol_util.cub_img(a) a_im = temp['im'] # image data a_vt = temp['vt'] # volume data # using DoG to detect all peaks, may contain peaks caused by noise peaks = peak__partition( a_vt, s1=s1, s2=s2, find_maxima=find_maxima, partition_op=partition_op, multiprocessing_process_num=multiprocessing_process_num) # calculate threshold T and delete peaks whose val are smaller than threshold # Related paper: Pei L, Xu M, Frazier Z, Alber F. Simulating Cryo-Electron Tomograms of Crowded Mixtures of Macromolecular Complexes and Assessment of Particle Picking. BMC Bioinformatics. 2016; 17: 405. M = peaks[0]['val'] # max val of all peaks m = peaks[len(peaks) - 1]['val'] # min val of all peaks T = m + t * (M - m) / 20 peak_vals_neg = [-peak['val'] * find_maxima for peak in peaks] res = peaks[:bisect(peak_vals_neg, -T * find_maxima) - 1] assert res[-1]['val'] >= T print("%d particles detected, containing redundant peaks" % len(res)) result = do_filter(pp=res, peak_dist_min=s1, op=None) # remove redundant peaks print("peak number reduced to %d" % len(result)) if pick_num is None: pass elif pick_num < len(res): res = res[:pick_num] print("T=m+t*(M-m)/20 \nT=%f m=%f t=%f M=%f" % (T, m, t, M)) return res
def dump_subvol(self,picking_result): from aitom.classify.deep.unsupervised.autoencoder.autoencoder_util import peaks_to_subvolumes subvols_loc = os.path.join(self.dump_path,"demo_single_particle_subvolumes.pickle") a = io_file.read_mrc_data(self.path) d = peaks_to_subvolumes(im_vol_util.cub_img(a)['vt'], picking_result, 32) io_file.pickle_dump(d, subvols_loc) print("Save subvolumes .pickle file to:", subvols_loc)
def particle_picking(mrc_header): sigma1 = max(int(7 / voxel_spacing_in_nm), 2) # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2 print('sigma1=%d' % sigma1) # For particular tomogram, larger sigma1 value may have better results. Use IMOD to display selected peaks and determine best sigma1. # For 'aitom_demo_cellular_tomogram.mrc', sigma1 is 5 rather than 3 for better performance(in this tomogram, 7nm corresponds to 3.84 pixels) # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1) partition_op = {'nonoverlap_width': sigma1 * 20, 'overlap_width': sigma1 * 10, 'save_vg': False} result = picking(path, s1=sigma1, s2=sigma1 * 1.1, t=3, find_maxima=False, partition_op=partition_op, multiprocessing_process_num=10, pick_num=1000) print("DoG done, %d particles picked" % len(result)) pprint(result[:5]) # (Optional) Save subvolumes of peaks for autoencoder input dump_subvols = True if dump_subvols: # use later for autoencoder subvols_loc = "demo_single_particle_subvolumes.pickle" from aitom.classify.deep.unsupervised.autoencoder.autoencoder_util import peaks_to_subvolumes a = io_file.read_mrc_data(path) d = peaks_to_subvolumes(im_vol_util.cub_img(a)['vt'], result, 32) io_file.pickle_dump(d, subvols_loc) print("Save subvolumes .pickle file to:", subvols_loc)
def __init__(self, path, rescale_size=0.2, figsize=(6, 6), cmap='gray'): # rescale image vol = io_file.read_mrc_data(path) self.rescale_size = rescale_size self.volume = rescale(vol, self.rescale_size, anti_aliasing=False) self.figsize = figsize self.cmap = cmap self.v = [np.min(self.volume), np.max(self.volume)] self.fig, self.current_ax = plt.subplots() self.annotations = {} ipyw.interact(self.view_selection)
def convert(op): """ functions to convert pdb structures to density maps convert pdb file to a volume file using Situs's pdb2vol """ import tempfile import os import subprocess import aitom.io.file as TIF [fh, out_fn ] = tempfile.mkstemp(prefix='tmp-%s-%d-%d-' % (op['pdb_id'], op['spacing'], op['resolution']), suffix='.mrc') os.close(fh) assert os.path.isfile(op['pdb_file']) cmd = [str(op['situs_pdb2vol_program']), op['pdb_file'], out_fn] proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False, universal_newlines=True) print(2, file=proc.stdin ) # Do you want to mass-weight the atoms ? 1: No 2: Yes print( 1, file=proc.stdin ) # Do you want to select atoms based on a B-factor threshold? 1: No 2: Yes print( op['spacing'], file=proc.stdin ) # Please enter the desired voxel spacing for the output map (in Angstrom): print( -op['resolution'], file=proc.stdin ) # Kernel width. Please enter (in Angstrom): (as pos. value) kernel half-max radius or (as # neg. value) target resolution (2 sigma) # print(1, file=proc.stdin) # Please select the type of smoothing kernel: 1: Gaussian, exp(-1.5 r^2 / sigma^2) # # 2: Triangular, max(0, 1 - 0.5 |r| / r-half) 3: Semi-Epanechnikov, max(0, 1 - 0.5 |r|^1.5 / # # r-half^1.5) 4: Epanechnikov, max(0, 1 - 0.5 r^2 / r-half^2) 5: Hard Sphere, max(0, # # 1 - 0.5 r^60 / r-half^60) # print(1, file=proc.stdin) # Do you want to correct for lattice interpolation smoothing effects? 1: Yes ( # # slightly lowers the kernel width to maintain target resolution) 2: No # print(1, file=proc.stdin) # Finally, please enter the desired kernel amplitude (scaling factor): proc.communicate() op['map'] = TIF.read_mrc_data(out_fn).astype('float') os.remove(out_fn) print('pdb_id', op['pdb_id'], 'map size:', op['map'].shape, 'mean:', op['map'].mean(), 'std:', op['map'].std()) return op
def main(): # Download from: https://cmu.box.com/s/9hn3qqtqmivauus3kgtasg5uzlj53wxp path = '/ldap_shared/home/v_zhenxi_zhu/data/aitom_demo_cellular_tomogram.mrc' # Also, we can crop and only use part of the mrc image instead of binning for tasks requiring higher resolution # crop_path = 'cropped.mrc' # crop_mrc(path, crop_path) mrc_header = io_file.read_mrc_header(path) voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC'][ 'nx'] / 10 sigma1 = max( int(7 / voxel_spacing_in_nm), 2 ) # 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2 # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1) partition_op = { 'nonoverlap_width': sigma1 * 20, 'overlap_width': sigma1 * 10, 'save_vg': False } result = picking(path, s1=sigma1, s2=sigma1 * 1.1, t=3, find_maxima=False, partition_op=partition_op, multiprocessing_process_num=100) print("%d particles detected, containing redundant peaks" % len(result)) result = do_filter(pp=result, peak_dist_min=sigma1, op=None) # remove redundant peaks print("peak number reduced to %d" % len(result)) pprint(result[:5]) # Display selected peaks using imod/3dmod (http://bio3d.colorado.edu/imod/) a = io_file.read_mrc_data(path) json_data = [] # generate file for 3dmod for i in range(len(result)): loc_np = result[i]['x'] loc = [] for j in range(len(loc_np)): loc.append(loc_np[j].tolist()) json_data.append({'peak': {'loc': loc}}) with open('data_json_file.json', 'w') as f: json.dump(json_data, f) dj = json_data x = N.zeros((len(dj), 3)) for i, d in enumerate(dj): x[i, :] = N.array(d['peak']['loc']) l = generate_lines(x_full=x, rad=sigma1) display_map_with_lines(l=l, map_file=path)
def main(): # Download from: https://cmu.box.com/s/9hn3qqtqmivauus3kgtasg5uzlj53wxp path = '/ldap_shared/home/v_zhenxi_zhu/data/aitom_demo_single_particle_tomogram.mrc' # Also, we can crop and only use part of the mrc image instead of binning for tasks requiring higher resolution # crop_path = 'cropped.mrc' # crop_mrc(path, crop_path) mrc_header = io_file.read_mrc_header(path) voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC'][ 'nx'] / 10 sigma1 = max( int(7 / voxel_spacing_in_nm), 2 ) # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2 print('sigma1=%d' % sigma1) # For particular tomogram, larger sigma1 value may have better results. Use IMOD to display selected peaks and determine best sigma1. # For 'aitom_demo_cellular_tomogram.mrc', sigma1 is 5 rather than 3 for better performance(in this tomogram, 7nm corresponds to 3.84 pixels) # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1) partition_op = { 'nonoverlap_width': sigma1 * 20, 'overlap_width': sigma1 * 10, 'save_vg': False } result = picking(path, s1=sigma1, s2=sigma1 * 1.1, t=3, find_maxima=False, partition_op=partition_op, multiprocessing_process_num=10, pick_num=1000) print("DoG done, %d particles picked" % len(result)) pprint(result[:5]) # (Optional) Save subvolumes of peaks for autoencoder input dump_subvols = True if dump_subvols: # use later for autoencoder subvols_loc = "demo_single_particle_subvolumes.pickle" from aitom.classify.deep.unsupervised.autoencoder.autoencoder_util import peaks_to_subvolumes a = io_file.read_mrc_data(path) d = peaks_to_subvolumes(im_vol_util.cub_img(a)['vt'], result, 32) io_file.pickle_dump(d, subvols_loc) print("Save subvolumes .pickle file to:", subvols_loc) # Display selected peaks using imod/3dmod (http://bio3d.colorado.edu/imod/) ''' #Optional: smooth original image a = io_file.read_mrc_data(path) path =path[:-5]+'_smoothed'+path[-4:] temp = im_vol_util.cub_img(a) s1 = sigma1 s2=sigma1*1.1 vg = dog_smooth(temp['vt'], s1,s2) #vg = smooth(temp['vt'], s1) TIM.write_data(vg,path) ''' json_data = [] # generate file for 3dmod for i in range(len(result)): loc_np = result[i]['x'] loc = [] for j in range(len(loc_np)): loc.append(loc_np[j].tolist()) json_data.append({'peak': {'loc': loc}}) with open('data_json_file.json', 'w') as f: json.dump(json_data, f) dj = json_data x = N.zeros((len(dj), 3)) for i, d in enumerate(dj): x[i, :] = N.array(d['peak']['loc']) l = generate_lines(x_full=x, rad=sigma1) display_map_with_lines(l=l, map_file=path)
def contour_shrink(img): result = img.copy() for i in range(1, result.shape[0] - 1): for j in range(1, result.shape[1] - 1): if img[i][j] == 1: if img[i - 1][j] == 2: result[i - 1][j] = 1 if img[i + 1][j] == 2: result[i + 1][j] = 1 if img[i][j - 1] == 2: result[i][j - 1] = 1 if img[i][j + 1] == 2: result[i][j + 1] = 1 return result if __name__ == '__main__': path = "/ldap_shared/home/v_zhenxi_zhu/membrane/aitom/membrane.mrc" # file path v = AIF.read_mrc_data(path) mask_v = active_contour_slice(v, sigma=3.5, membrane_thickness=5, display_slice=10, out_dir='./output', save_flag=True) unique, counts = np.unique(mask_v, return_counts=True) print( 'mask volume contains(1=membrance voxels, 2=voxels in the membrane, 3=voxels outside the membrane):\n', dict(zip(unique, counts)))
for i in range(0, b.shape[0]): for j in range(0, b.shape[1]): for k in range(0, b.shape[2]): sum_f[label[i][j][k]][1] = sum_f[label[i][j][k]][1] + 1 sum_f[label[i][j] [k]][0] = sum_f[label[i][j][k]][0] + b[i][j][k] for i in range(cluster_center_number): assert sum_f[i][1] > 0 result[i] = sum_f[i][0] / sum_f[i][1] return result if __name__ == "__main__": path = './aitom_demo_single_particle_tomogram.mrc' # file path mrc_header = io_file.read_mrc_header(path) a = io_file.read_mrc_data(path) # volume data assert a.shape[0] > 0 a = a.astype(np.float32) print("file has been read, shape is", a.shape) start_time = time.time() saliency_detection(a=a, gaussian_sigma=2.5, gabor_sigma=14.0, gabor_lambda=13.0, cluster_center_number=10000, multiprocessing_num=0, pick_num=1000, save_flag=True) end_time = time.time() print('saliency detection takes', end_time - start_time, 's')
""" Load Volume and Display image¶ change to parent directory to use aitom library """ import os os.chdir("..") # load mrc file using io module # example data: # http://ftp.ebi.ac.uk/pub/databases/empiar/archive/10045/data/ribosomes/AnticipatedResults/Particles/Tomograms/05/IS002_291013_005_subtomo000001.mrc import aitom.io.file as IF a = IF.read_mrc_data("data/IS002_291013_005_subtomo000001.mrc") # denoising using gaussian filter for visualization from aitom.filter.gaussian import smooth a = smooth(a, sigma=8) # display image using image module import aitom.image.vol.util as IVU ''' a_im is a dict: 'im': image data, type of numpy.ndarray, elements in [0, 1] 'vt': volume data ''' a_im = IVU.cub_img(a) print(type(a_im['im'])) print(a_im['im'].shape) print(a_im['im'][1][1]) import matplotlib.pyplot as plt
''' import os os.chdir("..") ''' load mrc file using io module example data: http://ftp.ebi.ac.uk/pub/databases/empiar/archive/10045/data/ribosomes/AnticipatedResults/Particles/Tomograms/05/IS002_291013_005_subtomo000001.mrc ''' import aitom.io.file as io_file a = io_file.read_mrc_data("data/IS002_291013_005_subtomo000001.mrc") # denoising using gaussian filter for visualization from aitom.filter.gaussian import smooth a = smooth(a, sigma=8) # display image using image module import aitom.image.vol.util as im_vol_util a_im = im_vol_util.cub_img(a) ''' a_im is a dict: 'im': image data 'vt': volume data