def dump_subvol(self,picking_result): from aitom.classify.deep.unsupervised.autoencoder.autoencoder_util import peaks_to_subvolumes subvols_loc = os.path.join(self.dump_path,"demo_single_particle_subvolumes.pickle") a = io_file.read_mrc_data(self.path) d = peaks_to_subvolumes(im_vol_util.cub_img(a)['vt'], picking_result, 32) io_file.pickle_dump(d, subvols_loc) print("Save subvolumes .pickle file to:", subvols_loc)
def load_dict(path): if not os.path.isfile(path): d = {} AIF.pickle_dump(d, path) else: d = AIF.pickle_load(path) return d
def combine_subtom(out_dir,pickle_path): subvols_loc = os.path.join(out_dir,'selected_demo_single_particle_subvolumes.pickle') pickle_data = AIF.pickle_load(pickle_path) d = AIF.pickle_load(subvols_loc) subvols = [] for v in d['vs'].values(): if v['v'] is not None: subvols.append(v['v']) subtom = pickle_data['1KP8_data'] + pickle_data['1KP8_data'] +subvols[:100] print('Total subtomograms: ',len(subtom)) subvols_loc = os.path.join(out_dir,'subvolumes.pickle') d = {} d['v_siz'] = np.array([32,32,32]) d['vs'] = {} labels = {} for i in range(len(subtom)): uuid_i = str(uuid.uuid4()) d['vs'][uuid_i] = {} d['vs'][uuid_i]['center'] = None d['vs'][uuid_i]['id'] = uuid_i d['vs'][uuid_i]['v'] = subtom[i] d['vs'][uuid_i]['label'] = int(i/100) AIF.pickle_dump(d, subvols_loc) print("Save subvolumes .pickle file to:", subvols_loc)
def particle_picking(mrc_header): sigma1 = max(int(7 / voxel_spacing_in_nm), 2) # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2 print('sigma1=%d' % sigma1) # For particular tomogram, larger sigma1 value may have better results. Use IMOD to display selected peaks and determine best sigma1. # For 'aitom_demo_cellular_tomogram.mrc', sigma1 is 5 rather than 3 for better performance(in this tomogram, 7nm corresponds to 3.84 pixels) # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1) partition_op = {'nonoverlap_width': sigma1 * 20, 'overlap_width': sigma1 * 10, 'save_vg': False} result = picking(path, s1=sigma1, s2=sigma1 * 1.1, t=3, find_maxima=False, partition_op=partition_op, multiprocessing_process_num=10, pick_num=1000) print("DoG done, %d particles picked" % len(result)) pprint(result[:5]) # (Optional) Save subvolumes of peaks for autoencoder input dump_subvols = True if dump_subvols: # use later for autoencoder subvols_loc = "demo_single_particle_subvolumes.pickle" from aitom.classify.deep.unsupervised.autoencoder.autoencoder_util import peaks_to_subvolumes a = io_file.read_mrc_data(path) d = peaks_to_subvolumes(im_vol_util.cub_img(a)['vt'], result, 32) io_file.pickle_dump(d, subvols_loc) print("Save subvolumes .pickle file to:", subvols_loc)
def select(self,remove_particles,pick_num): d = io_file.pickle_load(os.path.join(self.dump_path,"demo_single_particle_subvolumes.pickle")) subvols_loc = os.path.join(self.dump_path,"selected_demo_single_particle_subvolumes.pickle") particles_num = pick_num result = {} result['v_siz'] = d['v_siz'] result['vs'] = {} remove_particles = np.array(remove_particles) # d = {v_siz:(32,32,32), vs:{uuid0:{center, v, id}, uuid1:{center, v, id} ... }} for i in range(len(self.centers)): if i in remove_particles: continue uuid_i = self.uuids[i] result['vs'][uuid_i] = d['vs'][uuid_i] if len(result['vs']) >= particles_num: break assert len(result['vs']) == particles_num # subvols_loc = './tmp/picking/selected_demo_single_particle_subvolumes.pickle' AIF.pickle_dump(result, subvols_loc) print("Save subvolumes .pickle file to:", subvols_loc)
def average(dj_init=None, img_db=None, djs_file=None, avgs_file=None, pcas_file=None, op=None): djs = load_dict(op['data_checkpoint']) avgs = load_dict(op['average']['checkpoint']) if -1 not in djs: # store initial data assert len(djs) == 0 djs[-1] = dj_init AIF.pickle_dump(djs, op['data_checkpoint']) dj = djs[-1] for pass_i in range(op['option']['pass_num']): print('pass_i', pass_i) if pass_i in djs: dj = djs[pass_i] continue dj = copy.deepcopy( dj) # make a copy of the previous pass, for an update c = str(uuid.uuid4()) avg_t = vol_avg(dj=dj, op=op['average'], img_db=img_db) avgs[c] = avg_t avgs[c]['pass_i'] = pass_i avgs[c]['id'] = c AIF.pickle_dump(avgs, op['average']['checkpoint']) print('averaging done') # re-align subtomograms al = align_all_pairs(avgs=avgs, dj=dj, img_db=img_db) a = align_all_pairs__select_best(al) for d in dj: i = d['subtomogram'] d['loc'] = a[i]['loc'] d['angle'] = a[i]['angle'] d['score'] = a[i]['score'] d['template_id'] = a[i]['template_id'] print('re-align done') djs[pass_i] = dj AIF.pickle_dump(djs, op['data_checkpoint'])
def EM(img_data, K, iteration, path, snapshot_interval=5, reg=False, use_voronoi=True): """ The main estimation-maximization algorithm """ np.seterr(all='ignore') X = get_image_db(img_data['db_path']) dj = img_data['dj'] N = len(dj) n_x, n_y, n_z = X[dj[0]['v']].shape theta = dict() theta['N'] = N theta['J'] = n_x * n_y * n_z theta['n'] = n_x theta['K'] = K # Proportional to the radius of the image theta['xi'] = theta['n'] # We need to initialize this later theta['A'] = np.zeros([K, n_x, n_y, n_z], dtype=np.complex128) theta['alpha'] = np.ones([K], dtype=np.float_) / K theta['trans_list'] = None theta['predictions'] = np.zeros([N]) # Print relavent information print("Running model based alignment: N=%d, K=%d, dimensions=(%d,%d,%d)" % (N, K, n_x, n_y, n_z)) if reg: print("With regularization") else: print("Without regularization") if use_voronoi: print("With voronoi weights") else: print("Without voronoi weights") # Regularization reg_step = (float(N) / K**2) / 2 theta['theta_reg'] = 5 * reg_step if reg else 0 # Sample K random data points from the set to initialize A indices = np.random.permutation(N) num_models = [0 for _ in range(K)] k = 0 for i in range(N): theta['A'][k] += X[dj[indices[i]]['v']] * X[dj[indices[i]]['m']] num_models[k] += 1 k = (k + 1) % K for k in range(K): theta['A'][k] /= num_models[k] # Get a random A_k and a random X_i and calculate sum_j to get sigma_sq k = np.random.randint(K) i = np.random.randint(N) sum_j = np.sum( np.square(np.absolute(theta['A'][k] - X[dj[i]['v']]) * X[dj[i]['m']])) theta['sigma_sq'] = sum_j / theta['J'] print("Sigma_sq initialized to %d" % theta['sigma_sq']) checkpoint_dir = os.path.join(path, 'checkpoints') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) interval = snapshot_interval for i in range(iteration): checkpoint_file = os.path.join(checkpoint_dir, '%08d.pickle' % i) if os.path.exists(checkpoint_file): checkpoint_data = AIF.pickle_load(checkpoint_file) theta = checkpoint_data['theta'] continue if i % interval == 0: output_images(theta, i, path=path) print("Running iteration %d" % (i + 1)) # Update alpha before updating A compute_trans_list(theta=theta, img_data=img_data, use_voronoi=use_voronoi) alpha = update_alpha(img_data=img_data, theta=theta, use_voronoi=use_voronoi) print("Alpha updated! Alpha = ", end=' ') print(alpha.tolist()) sigma_sq = update_sigma(img_data=img_data, theta=theta, reg=reg, use_voronoi=use_voronoi) print("Sigma updated! Sigma^2 = ", end=' ') print(sigma_sq) xi = update_xi(img_data=img_data, theta=theta, use_voronoi=use_voronoi) print("Xi updated! Xi = ", end=' ') print(xi) A = update_a(img_data=img_data, theta=theta, alpha=alpha, reg=reg, use_voronoi=use_voronoi) print("A updated! Average intensity of A = ", end=' ') print(np.average(A, (1, 2, 3))) theta['alpha'] = alpha theta['sigma_sq'] = sigma_sq theta['xi'] = xi theta['A'] = A # Since we changed the models A, the list of optimal transforms # needs to be re-calculated theta['trans_list'] = None theta['pred'] = None # Decrease the regularization coefficient if reg and theta['theta_reg'] > 0: theta['theta_reg'] -= reg_step theta['theta_reg'] = max(0, theta['theta_reg']) try: assert not os.path.exists(checkpoint_file) except: raise Exception("Checkpoint file already exists!") AIF.pickle_dump({'theta': theta}, checkpoint_file) print_prediction_results(theta, img_data) output_images(theta, iteration, path=path) print("Prediction from model: ", end=' ') print(theta['predictions']) return theta
def main(): # Download from: https://cmu.box.com/s/9hn3qqtqmivauus3kgtasg5uzlj53wxp path = '/ldap_shared/home/v_zhenxi_zhu/data/aitom_demo_single_particle_tomogram.mrc' # Also, we can crop and only use part of the mrc image instead of binning for tasks requiring higher resolution # crop_path = 'cropped.mrc' # crop_mrc(path, crop_path) mrc_header = io_file.read_mrc_header(path) voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC'][ 'nx'] / 10 sigma1 = max( int(7 / voxel_spacing_in_nm), 2 ) # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2 print('sigma1=%d' % sigma1) # For particular tomogram, larger sigma1 value may have better results. Use IMOD to display selected peaks and determine best sigma1. # For 'aitom_demo_cellular_tomogram.mrc', sigma1 is 5 rather than 3 for better performance(in this tomogram, 7nm corresponds to 3.84 pixels) # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1) partition_op = { 'nonoverlap_width': sigma1 * 20, 'overlap_width': sigma1 * 10, 'save_vg': False } result = picking(path, s1=sigma1, s2=sigma1 * 1.1, t=3, find_maxima=False, partition_op=partition_op, multiprocessing_process_num=10, pick_num=1000) print("DoG done, %d particles picked" % len(result)) pprint(result[:5]) # (Optional) Save subvolumes of peaks for autoencoder input dump_subvols = True if dump_subvols: # use later for autoencoder subvols_loc = "demo_single_particle_subvolumes.pickle" from aitom.classify.deep.unsupervised.autoencoder.autoencoder_util import peaks_to_subvolumes a = io_file.read_mrc_data(path) d = peaks_to_subvolumes(im_vol_util.cub_img(a)['vt'], result, 32) io_file.pickle_dump(d, subvols_loc) print("Save subvolumes .pickle file to:", subvols_loc) # Display selected peaks using imod/3dmod (http://bio3d.colorado.edu/imod/) ''' #Optional: smooth original image a = io_file.read_mrc_data(path) path =path[:-5]+'_smoothed'+path[-4:] temp = im_vol_util.cub_img(a) s1 = sigma1 s2=sigma1*1.1 vg = dog_smooth(temp['vt'], s1,s2) #vg = smooth(temp['vt'], s1) TIM.write_data(vg,path) ''' json_data = [] # generate file for 3dmod for i in range(len(result)): loc_np = result[i]['x'] loc = [] for j in range(len(loc_np)): loc.append(loc_np[j].tolist()) json_data.append({'peak': {'loc': loc}}) with open('data_json_file.json', 'w') as f: json.dump(json_data, f) dj = json_data x = N.zeros((len(dj), 3)) for i, d in enumerate(dj): x[i, :] = N.array(d['peak']['loc']) l = generate_lines(x_full=x, rad=sigma1) display_map_with_lines(l=l, map_file=path)
def classify(dj_init=None, img_db=None, djs_file=None, avgs_file=None, pcas_file=None, op=None): """ classify parameters: dj_init: a list of dicts, where each element looks like: {'subtomogram':v_id, 'mask':mask_id, 'angle':ang_t, 'loc':loc_t, 'model_id':model_id} img_db: a dict to find subtomogram data by its uuid (img_db[uuid] is a 3D np array) result(pickle file): average results of each class """ djs = load_dict(op['data_checkpoint']) pcas = load_dict(op['dim_reduction']['pca']['checkpoint']) clus = load_dict(op['clustering']['checkpoint']) avgs = load_dict(op['average']['checkpoint']) if -1 not in djs: # store initial data assert len(djs) == 0 djs[-1] = dj_init AIF.pickle_dump(djs, op['data_checkpoint']) dj = djs[-1] for pass_i in range(op['option']['pass_num']): if pass_i in djs: dj = djs[pass_i] continue # make a copy of the previous pass, for an update dj = copy.deepcopy(dj) if pass_i not in pcas: red = covariance_filtered_pca(dj=dj, img_db=img_db, templates=avgs, pca_op=op['dim_reduction']['pca']) # print(type(red)) pcas[pass_i] = red AIF.pickle_dump(pcas, op['dim_reduction']['pca']['checkpoint']) else: red = copy.deepcopy(pcas[pass_i]) if pass_i not in clus: lbl = kmeans_clustering(x=red, k=op['clustering']['kmeans_k']) clus[pass_i] = lbl AIF.pickle_dump(clus, op['clustering']['checkpoint']) else: lbl = clus[pass_i] # print('lbl', lbl) for d in dj: d['cluster'] = lbl[d['subtomogram']] # calculate cluster averages new_avgs = set() for c in set([lbl[_] for _ in lbl]): # print('c', c) if c in avgs: continue avg_t = vol_avg(dj=[_ for _ in dj if _['cluster'] == c], op=op['average'], img_db=img_db) if avg_t is None: continue avgs[c] = avg_t avgs[c]['pass_i'] = pass_i avgs[c]['id'] = c new_avgs.add(c) if len(new_avgs) > 0: AIF.pickle_dump(avgs, op['average']['checkpoint']) # print('avgs') # for key in avgs: # print('\n',pass_i,key) # for key2 in avgs[key]: # print(key2) # print(avgs[key]['pass_i'],avgs[key]['id']) # re-align subtomograms al = align_all_pairs(avgs=avgs, dj=dj, img_db=img_db) a = align_all_pairs__select_best(al) for d in dj: i = d['subtomogram'] d['loc'] = a[i]['loc'] d['angle'] = a[i]['angle'] d['score'] = a[i]['score'] d['template_id'] = a[i]['template_id'] djs[pass_i] = dj AIF.pickle_dump(djs, op['data_checkpoint'])
def encoder_simple_conv_test(d, pose, img_org_file, out_dir, clus_num): if pose: assert img_org_file is not None tom0 = auto.read_mrc_numpy_vol(img_org_file) tom = AFG.smooth(tom0, 2.0) x_keys = [_ for _ in d['vs'] if d['vs'][_]['v'] is not None] x_train_no_pose = [N.expand_dims(d['vs'][_]['v'], -1) for _ in x_keys] x_train_no_pose = N.array(x_train_no_pose) x_center = [d['vs'][_]['center'] for _ in x_keys] x_train = [] default_val = tom.mean() x_train_no_pose -= x_train_no_pose.max() x_train_no_pose = N.abs(x_train_no_pose) print('pose normalizing') for i in range(len(x_train_no_pose)): center = x_center[i] v = x_train_no_pose[i][:, :, :, 0] c = auto.center_mass(v) # calculate principal directions rm = auto.pca(v=v, c=c)['v'] mid_co = (N.array(v.shape) - 1) / 2.0 loc_r__pn = rm.T.dot(mid_co - c) # pose normalize so that the major axis is along x-axis vr = auto.rotate_retrieve(v, tom=tom, rm=rm, center=center, loc_r=loc_r__pn, default_val=default_val) x_train.append(vr) x_train = N.array(x_train) x_train = N.expand_dims(x_train, axis=4) print('pose normalization finished') else: x_keys = [_ for _ in d['vs'] if d['vs'][_]['v'] is not None] x_train = [N.expand_dims(d['vs'][_]['v'], -1) for _ in x_keys] x_train = N.array(x_train) if False: # warning, if you normalize here, you need also to normalize when decoding. # so it is better not normalize. Use batch normalization in the network instead if True: x_train -= x_train.mean() x_train /= x_train.std() else: x_train -= x_train.min() x_train /= x_train.max() x_train -= 0.5 x_train *= 2 # print('x_train.shape', x_train.shape) model_dir = op_join(out_dir, 'model') if not os.path.isdir(model_dir): os.makedirs(model_dir) model_autoencoder_checkpoint_file = op_join( model_dir, 'model-autoencoder--weights--best.h5') model_autoencoder_file = op_join(model_dir, 'model-autoencoder.h5') model_encoder_file = op_join(model_dir, 'model-encoder.h5') model_decoder_file = op_join(model_dir, 'model-decoder.h5') if not os.path.isfile(model_autoencoder_file): enc = encoder_simple_conv(img_shape=d['v_siz']) autoencoder = enc['autoencoder'] autoencoder_p = autoencoder from keras.optimizers import Adam # choose a proper lr to control convergance speed, and val_loss adam = Adam(lr=0.001, beta_1=0.9, decay=0.001 / 500) # sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) autoencoder_p.compile(optimizer=adam, loss='mean_squared_error') if os.path.isfile(model_autoencoder_checkpoint_file): print('loading previous best weights', model_autoencoder_checkpoint_file) autoencoder_p.load_weights(model_autoencoder_checkpoint_file) from keras.callbacks import EarlyStopping, ModelCheckpoint earlyStopping = EarlyStopping(monitor='val_loss', patience=20, verbose=0, mode='auto') checkpoint = ModelCheckpoint(model_autoencoder_checkpoint_file, monitor='val_loss', verbose=1, save_best_only=True, mode='auto') # use a large batch size when batch normalization is used autoencoder_p.fit(x_train, x_train, nb_epoch=100, batch_size=128, shuffle=True, validation_split=0.1, callbacks=[checkpoint, earlyStopping]) # we use the best weights for subsequent analysis autoencoder_p.load_weights(model_autoencoder_checkpoint_file) enc['autoencoder'].save(model_autoencoder_file) enc['encoder'].save(model_encoder_file) enc['decoder'].save(model_decoder_file) else: import keras.models as KM enc = dict() enc['autoencoder'] = KM.load_model(model_autoencoder_file) enc['encoder'] = KM.load_model(model_encoder_file) enc['decoder'] = KM.load_model(model_decoder_file) x_enc = enc['encoder'].predict(x_train) # use kmeans to seperate x_enc into a specific number of clusters, # then decode cluster centers, and patch the decoded cluster centers back to the image, # can use mayavi??? import multiprocessing from sklearn.cluster import KMeans kmeans_n_init = multiprocessing.cpu_count() kmeans = KMeans(n_clusters=clus_num, n_jobs=-1, n_init=100).fit(x_enc) x_km_cent = N.array( [_.reshape(x_enc[0].shape) for _ in kmeans.cluster_centers_]) x_km_cent_pred = enc['decoder'].predict(x_km_cent) # save cluster info and cluster centers clus_center_dir = op_join(out_dir, 'clus-center') if not os.path.isdir(clus_center_dir): os.makedirs(clus_center_dir) kmeans_clus = defaultdict(list) for i, l in enumerate(kmeans.labels_): kmeans_clus[l].append(x_keys[i]) AIF.pickle_dump(kmeans_clus, op_join(clus_center_dir, 'kmeans.pickle')) ccents = {} for i in range(len(x_km_cent_pred)): ccents[i] = x_km_cent_pred[i].reshape(d['v_siz']) AIF.pickle_dump(ccents, op_join(clus_center_dir, 'ccents.pickle')) AIF.pickle_dump(x_km_cent, op_join(clus_center_dir, 'ccents_d.pickle'))
else: class_num = 1 for model_id in range(class_num): for v_i in range(v_num): ang_t = [_ for _ in N.random.random(3) * (N.pi * 2)] # loc_t = TGA.random_translation(size=[v_dim_siz]*3, proportion=0.2) loc_t = [0.0, 0.0, 0.0] v_id = str(uuid.uuid4()) dj.append({ 'subtomogram': v_id, 'mask': mask_id, 'angle': ang_t, 'loc': loc_t, 'model_id': model_id }) AIF.pickle_dump(dj, dj_file) sim_op = { 'model': { 'missing_wedge_angle': wedge_angle, 'titlt_angle_step': 1, 'SNR': 1000, 'band_pass_filter': False, 'use_proj_mask': False }, 'ctf': { 'pix_size': 1.0, 'Dz': -5.0, 'voltage': 300, 'Cs': 2.0, 'sigma': 0.4
def single_average(subtom): print('subtom_type=', type(subtom)) assert len(subtom) == 100 print('subtom[0]_type=', type(subtom[0])) average = True test_dir = './tmp/cls-test/' + str(uuid.uuid4()) # test dir if os.path.exists(test_dir): shutil.rmtree(test_dir) os.makedirs(test_dir) dj_file = os.path.join(test_dir, 'data.pickle') img_db_file = os.path.join(test_dir, 'image.db') v_num = 100 # the number of each class v_dim_siz = 32 wedge_angle = 30 mask_id = str(uuid.uuid4()) dj = [] class_num = 1 for model_id in range(class_num): for v_i in range(v_num): ang_t = [_ for _ in N.random.random(3) * (N.pi * 2)] # loc_t = TGA.random_translation(size=[v_dim_siz]*3, proportion=0.2) loc_t = [0.0, 0.0, 0.0] v_id = str(uuid.uuid4()) dj.append({ 'subtomogram': v_id, 'mask': mask_id, 'angle': ang_t, 'loc': loc_t, 'model_id': model_id }) AIF.pickle_dump(dj, dj_file) sim_op = { 'model': { 'missing_wedge_angle': wedge_angle, 'titlt_angle_step': 1, 'SNR': 1000, 'band_pass_filter': False, 'use_proj_mask': False }, 'ctf': { 'pix_size': 1.0, 'Dz': -5.0, 'voltage': 300, 'Cs': 2.0, 'sigma': 0.4 } } img_db = TIDL.LSM(img_db_file) index = 0 for d in dj: img_db[d['subtomogram']] = subtom[index].astype(N.float) # print(img_db[d['subtomogram']].shape) index = index + 1 import aitom.image.vol.wedge.util as TIVWU img_db[mask_id] = TIVWU.wedge_mask(size=[v_dim_siz] * 3, ang1=wedge_angle) print('file generation complete') out_dir = os.path.join(test_dir, 'out') if os.path.exists(out_dir): shutil.rmtree(out_dir) os.makedirs(out_dir) from aitom.classify.align.simple_iterative.classify import randomize_orientation from aitom.classify.align.simple_iterative.classify import export_avgs if average: import aitom.average.align.simple_iterative.average as avg op = {} op['option'] = {'pass_num': 20} # the number of iterations op['data_checkpoint'] = os.path.join(out_dir, 'djs.pickle') op['average'] = {} op['average']['mask_count_threshold'] = 2 op['average']['checkpoint'] = os.path.join(out_dir, 'avgs.pickle') dj = AIF.pickle_load(os.path.join(test_dir, 'data.pickle')) img_db = TIDL.LSM(os.path.join(test_dir, 'image.db'), readonly=True) randomize_orientation(dj) avg.average(dj_init=dj, img_db=img_db, op=op) export_avgs(AIF.pickle_load(os.path.join(out_dir, 'avgs.pickle')), out_dir=os.path.join(out_dir, 'avgs-export')) print('averaging done') # visualization # test_dir = './tmp/cls-test/'+str(uuid.uuid4()) # test dir avgs = pickle_load(os.path.join(test_dir, 'out/avgs.pickle')) out_dir = os.path.join(test_dir, 'image') if os.path.exists(out_dir): shutil.rmtree(out_dir) os.makedirs(out_dir) for i in avgs.keys(): v = avgs[i]['v'] file_name = str(avgs[i]['pass_i']) + '_' + str(i) + '.png' save_png(cub_img(v)['im'], os.path.join(out_dir, file_name)) print('images saved in', out_dir)
def classify(dj_init=None, img_db=None, djs_file=None, avgs_file=None, pcas_file=None, op=None): djs = load_dict(op['data_checkpoint']) pcas = load_dict(op['dim_reduction']['pca']['checkpoint']) clus = load_dict(op['clustering']['checkpoint']) avgs = load_dict(op['average']['checkpoint']) if -1 not in djs: # store initial data assert len(djs) == 0 djs[-1] = dj_init AIF.pickle_dump(djs, op['data_checkpoint']) dj = djs[-1] for pass_i in range(op['option']['pass_num']): if pass_i in djs: dj = djs[pass_i] continue dj = copy.deepcopy(dj) # make a copy of the previous pass, for an update if pass_i not in pcas: red = covariance_filtered_pca(dj=dj, img_db=img_db, templates=avgs, pca_op=op['dim_reduction']['pca']) # print(type(red)) pcas[pass_i] = red AIF.pickle_dump(pcas, op['dim_reduction']['pca']['checkpoint']) else: red = copy.deepcopy(pcas[pass_i]) if pass_i not in clus: lbl = kmeans_clustering(x=red, k=op['clustering']['kmeans_k']) clus[pass_i] = lbl AIF.pickle_dump(clus, op['clustering']['checkpoint']) else: lbl = clus[pass_i] # print('lbl', lbl) for d in dj: d['cluster'] = lbl[d['subtomogram']] # calculate cluster averages new_avgs = set() for c in set([lbl[_] for _ in lbl]): # print('c', c) if c in avgs: continue avg_t = vol_avg(dj=[_ for _ in dj if _['cluster'] == c], op=op['average'], img_db=img_db) if avg_t is None: continue avgs[c] = avg_t avgs[c]['pass_i'] = pass_i avgs[c]['id'] = c new_avgs.add(c) if len(new_avgs) > 0: AIF.pickle_dump(avgs, op['average']['checkpoint']) ''' print('avgs') for key in avgs: print('\n',pass_i,key) for key2 in avgs[key]: print(key2) print(avgs[key]['pass_i'],avgs[key]['id']) ''' # re-align subtomograms al = align_all_pairs(avgs=avgs, dj=dj, img_db=img_db) a = align_all_pairs__select_best(al) for d in dj: i = d['subtomogram'] d['loc'] = a[i]['loc'] d['angle'] = a[i]['angle'] d['score'] = a[i]['score'] d['template_id'] = a[i]['template_id'] djs[pass_i] = dj AIF.pickle_dump(djs, op['data_checkpoint'])
def average(dj_init=None, img_db=None, djs_file=None, avgs_file=None, pcas_file=None, op=None): """ parameters: dj_init: a list of dicts, where each element looks like: {'subtomogram':v_id, 'mask':mask_id, 'angle':ang_t, 'loc':loc_t, 'model_id':model_id} img_db: a dict to find subtomogram data by its uuid (img_db[uuid] is a 3D np array) it contains only one class result(pickle file): average result, the same shape as original subtomogram """ djs = load_dict(op['data_checkpoint']) avgs = load_dict(op['average']['checkpoint']) if -1 not in djs: # store initial data assert len(djs) == 0 djs[-1] = dj_init AIF.pickle_dump(djs, op['data_checkpoint']) dj = djs[-1] for pass_i in range(op['option']['pass_num']): print('pass_i', pass_i) if pass_i in djs: dj = djs[pass_i] continue # make a copy of the previous pass, for an update dj = copy.deepcopy(dj) c = str(uuid.uuid4()) avg_t = vol_avg(dj=dj, op=op['average'], img_db=img_db) avgs[c] = avg_t avgs[c]['pass_i'] = pass_i avgs[c]['id'] = c AIF.pickle_dump(avgs, op['average']['checkpoint']) print('averaging done') # re-align subtomograms al = align_all_pairs(avgs=avgs, dj=dj, img_db=img_db) a = align_all_pairs__select_best(al) for d in dj: i = d['subtomogram'] d['loc'] = a[i]['loc'] d['angle'] = a[i]['angle'] d['score'] = a[i]['score'] d['template_id'] = a[i]['template_id'] print('re-align done') djs[pass_i] = dj AIF.pickle_dump(djs, op['data_checkpoint'])