def run(self, args): #read inputs runh = run_handler() from prime.postrefine.mod_input import process_input iparams, txt_out_input = process_input(argv=args, flag_check_exist=False) iparams.flag_volume_correction = False if iparams.partiality_model == "Lognormal": iparams.voigt_nu = 0.008 #use voigt_nu as lognpdf zero parameter #read all result pickles try: DIR = iparams.run_no+'/pickles/' pickle_results = [pickle.load(open(DIR+fname, "rb")) for fname in os.listdir(DIR)] n_results = len(pickle_results) except Exception: print "Error reading input pickles." print "*VERSION UPGRADE NOTE* use prime.run instead of prime.postrefine to run all processes together." exit() #get reference file - look for n.mtz with n as maximum number. hklrefin = None if iparams.hklrefin is None: DIR = iparams.run_no+'/mtz/' file_no_list = [int(fname.split('.')[0]) for fname in os.listdir(DIR)] if len(file_no_list) > 0: hklrefin = DIR + str(max(file_no_list)) + '.mtz' else: hklrefin = iparams.hklrefin if hklrefin is None: print "No reference set found. Exit program" print "Reference set:", hklrefin mxh = mx_handler() flag_hklrefin_found, miller_array_ref = mxh.get_miller_array_from_reflection_file(hklrefin) #post-refinement avg_mode = 'weighted' #run command for post-refinement frames = [(i, pickle_results[i], iparams, miller_array_ref, avg_mode) for i in range(n_results)] inp_pickle = {'iparams':iparams, 'frames':frames} pickle.dump(inp_pickle, open(iparams.run_no+'/inputs/0.inp',"wb")) call(["prime._postrefine_frame", iparams.run_no+'/inputs/0.inp']) runh.check_done(iparams, n_results) print "Post-refinement completed. Run prime.merge for the merged reflection file."
def get_results(self, finished_objects=None): if not finished_objects: finished_objects = self.info.get_finished_objects() if not finished_objects: return False final_objects = [] self.info.unplotted_stats = {} for key in self.info.stats: self.info.unplotted_stats[key] = dict(lst=[]) for obj in finished_objects: item = [obj.input_index, obj.img_path, obj.img_index] if len(self.info.unprocessed ) > 0 and item in self.info.unprocessed: self.info.unprocessed.remove(item) if (len(self.info.categories['not_processed'][0]) > 0 and item in self.info.categories['not_processed'][0]): self.info.categories['not_processed'][0].remove(item) if obj.fail: key = obj.fail.replace(' ', '_') if key in self.info.categories: self.info.categories[key][0].append(item) else: self.info.categories['integrated'][0].append( obj.final['final']) self.info.final_objects.append(obj.obj_file) final_objects.append(obj) if not obj.fail or 'triage' not in obj.fail: self.info.categories['have_diffraction'][0].append( obj.img_path) # Calculate processing stats from final objects if final_objects: self.info.pixel_size = final_objects[0].final['pixel_size'] # Get observations from file try: all_obs = ep.load(self.info.idx_file) except Exception: all_obs = None # Collect image processing stats for obj in final_objects: for key in self.info.stats: if key in obj.final: stat_tuple = (obj.input_index, obj.img_path, obj.img_index, obj.final[key]) self.info.stats[key]['lst'].append(stat_tuple) if key not in self.info.unplotted_stats: self.info.unplotted_stats[key] = dict(lst=[]) self.info.unplotted_stats[key]['lst'].append( stat_tuple) # Unit cells and space groups (i.e. cluster iterable) self.info.cluster_iterable.append([ float(obj.final['a']), float(obj.final['b']), float(obj.final['c']), float(obj.final['alpha']), float(obj.final['beta']), float(obj.final['gamma']), str(obj.final['sg']) ]) # Get observations from this image obs = None if 'observations' in obj.final: obs = obj.final['observations'].as_non_anomalous_array() else: pickle_path = obj.final['final'] if os.path.isfile(pickle_path): try: pickle = ep.load(pickle_path) obs = pickle['observations'][ 0].as_non_anomalous_array() except Exception as e: print('IMAGE_PICKLE_ERROR for {}: {}'.format( pickle_path, e)) with util.Capturing(): if obs: # Append observations to combined miller array obs = obs.expand_to_p1() if all_obs: all_obs = all_obs.concatenate( obs, assert_is_similar_symmetry=False) else: all_obs = obs # Get B-factor from this image try: mxh = mx_handler() asu_contents = mxh.get_asu_contents(500) observations_as_f = obs.as_amplitude_array() observations_as_f.setup_binner(auto_binning=True) wp = statistics.wilson_plot(observations_as_f, asu_contents, e_statistics=True) b_factor = wp.wilson_b except RuntimeError as e: b_factor = 0 print('B_FACTOR_ERROR: ', e) self.info.b_factors.append(b_factor) # Save collected observations to file if all_obs: ep.dump(self.info.idx_file, all_obs) # Calculate dataset stats for k in self.info.stats: stat_list = list(zip(*self.info.stats[k]['lst']))[2] stats = dict(lst=self.info.stats[k]['lst'], median=np.median(stat_list), mean=np.mean(stat_list), std=np.std(stat_list), max=np.max(stat_list), min=np.min(stat_list), cons=Counter(stat_list).most_common(1)[0][0]) self.info.stats[k].update(stats) return True else: return False
def run(self, args): #read inputs from prime.postrefine.mod_input import process_input, read_pickles iparams, txt_out_input = process_input(args) print txt_out_input f = open(iparams.run_no+'/log.txt', 'w') f.write(txt_out_input) f.close() #if solution pickle is given, return the file name if iparams.indexing_ambiguity.index_basis_in is not None: if iparams.indexing_ambiguity.index_basis_in.endswith('.pickle'): return iparams.indexing_ambiguity.index_basis_in, iparams #read all integration pickles frame_files = read_pickles(iparams.data) n_frames = len(frame_files) if n_frames == 0: print "No integration pickle found. Exit program." return None, iparams #exit if no problem if self.should_terminate(iparams, frame_files[0]): print "No indexing ambiguity problem. Set index_ambiguity.mode = Forced and assigned_basis = list of basis formats to solve pseudo-twinning problem." return None, iparams #continue with (Auto - alt>1, find solution), (Auto - alt>1, mtz) #(Forced - assigned_basis, mtz), (Forced - assigned_basis, find solution) #************************************************* #if mtz file is given, use it to solve the problem sol_fname = iparams.run_no+'/index_ambiguity/solution_pickle.pickle' if iparams.indexing_ambiguity.index_basis_in is not None: if iparams.indexing_ambiguity.index_basis_in.endswith('.mtz'): mxh = mx_handler() flag_ref_found, miller_array_ref = mxh.get_miller_array_from_reflection_file(iparams.indexing_ambiguity.index_basis_in) if flag_ref_found == False: print "Reference mtz file not found. Set indexing_ambiguity.index_basis_in = None to enable auto generate the solutions." return None, iparams else: frames = [(i, frame_files[i], iparams, miller_array_ref) for i in range(n_frames)] cc_results = pool_map( iterable=frames, func=solve_with_mtz_mproc, processes=iparams.n_processors) sol_pickle = {} for result in cc_results: pickle_filename, index_basis = result sol_pickle[pickle_filename] = index_basis pickle.dump(sol_pickle, open(sol_fname,"wb")) return sol_fname, iparams #************************************************* #solve with Brehm & Diederichs - sample size n_sample_frames then bootstrap the rest frames = [(i, frame_files[i], iparams) for i in random.sample(range(n_frames), iparams.indexing_ambiguity.n_sample_frames)] #get observations list print "Reading observations" alt_dict_results = pool_map( iterable=frames, func=get_obs_mproc, processes=iparams.n_processors) frame_dup_files = [] frame_keys = [] obs_list = [] for result in alt_dict_results: alt_dict, pickle_filename = result if alt_dict is not None: for key in alt_dict.keys(): frame_dup_files.append(pickle_filename) frame_keys.append(key) obs_list.append(alt_dict[key]) frames = [(i, frame_dup_files[i], frame_keys[i], obs_list[i], obs_list) for i in range(len(frame_dup_files))] #calculate r print "Calculating R" calc_r_results = pool_map( iterable=frames, func=calculate_r_mproc, processes=iparams.n_processors) frame_dup_files = [] frame_keys = [] r_matrix = [] for result in calc_r_results: if result is not None: pickle_filename, index_basis, r_set = result frame_dup_files.append(pickle_filename) frame_keys.append(index_basis) if len(r_matrix) == 0: r_matrix = r_set else: r_matrix = np.append(r_matrix, r_set, axis=0) #choose groups with best CC print "Selecting frames with best R" i_mean_r = np.argsort(np.mean(r_matrix, axis=1))[::-1] r_matrix_sorted = r_matrix[i_mean_r] frame_dup_files_sorted = np.array(frame_dup_files)[i_mean_r] frame_keys_sorted = np.array(frame_keys)[i_mean_r] frame_dup_files_sel = [] for frame_file, frame_key, r_set in zip(frame_dup_files_sorted, frame_keys_sorted, r_matrix_sorted): if frame_file not in frame_dup_files_sel: frame_dup_files_sel.append(frame_file) print frame_file, frame_key, np.mean(r_set) if len(frame_dup_files_sel) >= iparams.indexing_ambiguity.n_selected_frames: print 'Found all %6.0f good frames'%(len(frame_dup_files_sel)) break ## #rebuild observations and r_matrix frames = [(i, frame_dup_files_sel[i], iparams) for i in range(len(frame_dup_files_sel))] #get observations list print "Re-reading observations" alt_dict_results = pool_map( iterable=frames, func=get_obs_mproc, processes=iparams.n_processors) frame_dup_files = [] frame_keys = [] obs_list = [] for result in alt_dict_results: alt_dict, pickle_filename = result if alt_dict is not None: for key in alt_dict.keys(): frame_dup_files.append(pickle_filename) frame_keys.append(key) obs_list.append(alt_dict[key]) frames = [(i, frame_dup_files[i], frame_keys[i], obs_list[i], obs_list) for i in range(len(frame_dup_files))] #calculate r print "Re-calculating R" calc_r_results = pool_map( iterable=frames, func=calculate_r_mproc, processes=iparams.n_processors) frame_dup_files = [] frame_keys = [] r_matrix = [] for result in calc_r_results: if result is not None: pickle_filename, index_basis, r_set = result frame_dup_files.append(pickle_filename) frame_keys.append(index_basis) if len(r_matrix) == 0: r_matrix = r_set else: r_matrix = np.append(r_matrix, r_set, axis=0) print "Minimizing frame distance" idah = indamb_handler() x_set = idah.optimize(r_matrix, flag_plot=iparams.flag_plot) x_pickle = {'frame_dup_files':frame_dup_files, 'frame_keys':frame_keys, \ 'r_matrix':r_matrix, 'x_set':x_set} pickle.dump(x_pickle, open(iparams.run_no+'/index_ambiguity/x.out',"wb")) print "Clustering results" kmh = kmeans_handler() k = 2**(len(idah.get_observations(frame_dup_files[0], iparams))-1) centroids, labels = kmh.run(x_set, k, flag_plot=iparams.flag_plot) print "Get solution pickle" sample_fname = iparams.run_no+'/index_ambiguity/sample.lst' sol_pickle = idah.assign_basis(frame_dup_files, frame_keys, labels, k, sample_fname) pickle.dump(sol_pickle, open(sol_fname,"wb")) #if more frames found, merge the sample frames to get a reference set #that can be used for breaking the ambiguity. if n_frames > iparams.indexing_ambiguity.n_selected_frames: print "Breaking the indexing ambiguity for the remaining images." old_iparams_data = iparams.data[:] iparams.data = [sample_fname] iparams.indexing_ambiguity.index_basis_in = sol_fname grh = genref_handler() grh.run_by_params(iparams) mh = merge_handler() mh.run_by_params(iparams) DIR = iparams.run_no+'/mtz/' file_no_list = [int(fname.split('.')[0]) for fname in os.listdir(DIR)] if len(file_no_list) > 0: hklref_indamb = DIR + str(max(file_no_list)) + '.mtz' print "Bootstrap reference reflection set:", hklref_indamb #setup a list of remaining frames frame_files_remain = [] for frame in frame_files: if frame not in sol_pickle: frame_files_remain.append(frame) #determine index basis mxh = mx_handler() flag_ref_found, miller_array_ref = mxh.get_miller_array_from_reflection_file(hklref_indamb) frames = [(i, frame_files_remain[i], iparams, miller_array_ref) for i in range(len(frame_files_remain))] cc_results = pool_map( iterable=frames, func=solve_with_mtz_mproc, processes=iparams.n_processors) for result in cc_results: pickle_filename, index_basis = result sol_pickle[pickle_filename] = index_basis iparams.data = old_iparams_data[:] #write out solution pickle pickle.dump(sol_pickle, open(sol_fname,"wb")) #write out text output txt_out = "Solving indexing ambiguity complete. Solution file saved to "+sol_fname+"\n" f = open(iparams.run_no+'/log.txt', 'a') f.write(txt_out) f.close() return sol_fname, iparams
def run(self, args): #read inputs from prime.postrefine.mod_input import process_input, read_pickles iparams, txt_out_input = process_input(args) print txt_out_input f = open(iparams.run_no+'/log.txt', 'w') f.write(txt_out_input) f.close() #if solution pickle is given, return the file name if iparams.indexing_ambiguity.index_basis_in is not None: if iparams.indexing_ambiguity.index_basis_in.endswith('.pickle'): return iparams.indexing_ambiguity.index_basis_in, iparams #read all integration pickles frame_files = read_pickles(iparams.data) n_frames = len(frame_files) if n_frames == 0: print "No integration pickle found. Exit program." return None, iparams #exit if no problem if self.should_terminate(iparams, frame_files[0]): print "No indexing ambiguity problem. Set index_ambiguity.mode = Forced and assigned_basis = list of basis formats to solve pseudo-twinning problem." return None, iparams #continue with (Auto - alt>1, find solution), (Auto - alt>1, mtz) #(Forced - assigned_basis, mtz), (Forced - assigned_basis, find solution) #************************************************* #if mtz file is given, use it to solve the problem sol_fname = iparams.run_no+'/index_ambiguity/solution_pickle.pickle' if iparams.indexing_ambiguity.index_basis_in is not None: if iparams.indexing_ambiguity.index_basis_in.endswith('.mtz'): mxh = mx_handler() flag_ref_found, miller_array_ref = mxh.get_miller_array_from_reflection_file(iparams.indexing_ambiguity.index_basis_in) if flag_ref_found == False: print "Reference mtz file not found. Set indexing_ambiguity.index_basis_in = None to enable auto generate the solutions." return None, iparams else: frames = [(i, frame_files[i], iparams, miller_array_ref) for i in range(n_frames)] cc_results = pool_map( iterable=frames, func=solve_with_mtz_mproc, processes=iparams.n_processors) sol_pickle = {} for result in cc_results: pickle_filename, index_basis = result sol_pickle[pickle_filename] = index_basis pickle.dump(sol_pickle, open(sol_fname,"wb")) return sol_fname, iparams #************************************************* #solve with Brehm & Diederichs - sample size n_sample_frames then bootstrap the rest frames = [(i, frame_files[i], iparams) for i in random.sample(range(n_frames), iparams.indexing_ambiguity.n_sample_frames)] #get observations list print "Reading observations" alt_dict_results = pool_map( iterable=frames, func=get_obs_mproc, processes=iparams.n_processors) frame_dup_files = [] frame_keys = [] obs_list = [] for result in alt_dict_results: alt_dict, pickle_filename = result if alt_dict is not None: for key in alt_dict.keys(): frame_dup_files.append(pickle_filename) frame_keys.append(key) obs_list.append(alt_dict[key]) frames = [(i, frame_dup_files[i], frame_keys[i], obs_list[i], obs_list) for i in range(len(frame_dup_files))] #calculate r print "Calculating R" calc_r_results = pool_map( iterable=frames, func=calculate_r_mproc, processes=iparams.n_processors) frame_dup_files = [] frame_keys = [] r_matrix = [] for result in calc_r_results: if result is not None: pickle_filename, index_basis, r_set = result frame_dup_files.append(pickle_filename) frame_keys.append(index_basis) if len(r_matrix) == 0: r_matrix = r_set else: r_matrix = np.append(r_matrix, r_set, axis=0) #choose groups with best CC print "Selecting frames with best R" i_mean_r = np.argsort(np.mean(r_matrix, axis=1))[::-1] r_matrix_sorted = r_matrix[i_mean_r] frame_dup_files_sorted = np.array(frame_dup_files)[i_mean_r] frame_keys_sorted = np.array(frame_keys)[i_mean_r] frame_dup_files_sel = [] for frame_file, frame_key, r_set in zip(frame_dup_files_sorted, frame_keys_sorted, r_matrix_sorted): if frame_file not in frame_dup_files_sel: frame_dup_files_sel.append(frame_file) print frame_file, frame_key, np.mean(r_set) if len(frame_dup_files_sel) >= iparams.indexing_ambiguity.n_selected_frames: print 'Found all %6.0f good frames'%(len(frame_dup_files_sel)) break ## #rebuild observations and r_matrix frames = [(i, frame_dup_files_sel[i], iparams) for i in range(len(frame_dup_files_sel))] #get observations list print "Re-reading observations" alt_dict_results = pool_map( iterable=frames, func=get_obs_mproc, processes=iparams.n_processors) frame_dup_files = [] frame_keys = [] obs_list = [] for result in alt_dict_results: alt_dict, pickle_filename = result if alt_dict is not None: for key in alt_dict.keys(): frame_dup_files.append(pickle_filename) frame_keys.append(key) obs_list.append(alt_dict[key]) frames = [(i, frame_dup_files[i], frame_keys[i], obs_list[i], obs_list) for i in range(len(frame_dup_files))] #calculate r print "Re-calculating R" calc_r_results = pool_map( iterable=frames, func=calculate_r_mproc, processes=iparams.n_processors) frame_dup_files = [] frame_keys = [] r_matrix = [] for result in calc_r_results: if result is not None: pickle_filename, index_basis, r_set = result frame_dup_files.append(pickle_filename) frame_keys.append(index_basis) if len(r_matrix) == 0: r_matrix = r_set else: r_matrix = np.append(r_matrix, r_set, axis=0) print "Minimizing frame distance" idah = indamb_handler() x_set = idah.optimize(r_matrix, flag_plot=iparams.flag_plot) x_pickle = {'frame_dup_files':frame_dup_files, 'frame_keys':frame_keys, \ 'r_matrix':r_matrix, 'x_set':x_set} pickle.dump(x_pickle, open(iparams.run_no+'/index_ambiguity/x.out',"wb")) print "Clustering results" kmh = kmeans_handler() k = 2**(len(idah.get_observations(frame_dup_files[0], iparams))-1) centroids, labels = kmh.run(x_set, k, flag_plot=iparams.flag_plot) print "Get solution pickle" sample_fname = iparams.run_no+'/index_ambiguity/sample.lst' sol_pickle = idah.assign_basis(frame_dup_files, frame_keys, labels, k, sample_fname) pickle.dump(sol_pickle, open(sol_fname,"wb")) #if more frames found, merge the sample frames to get a reference set #that can be used for breaking the ambiguity. if n_frames > iparams.indexing_ambiguity.n_selected_frames: print "Breaking the indexing ambiguity for the remaining images." old_iparams_data = iparams.data[:] iparams.data = [sample_fname] iparams.indexing_ambiguity.index_basis_in = sol_fname grh = genref_handler() grh.run_by_params(iparams) mh = merge_handler() mh.run_by_params(iparams) DIR = iparams.run_no+'/mtz/' file_no_list = [int(fname.split('.')[0]) for fname in os.listdir(DIR)] if len(file_no_list) > 0: hklref_indamb = DIR + str(max(file_no_list)) + '.mtz' print "Bootstrap reference reflection set:", hklref_indamb #setup a list of remaining frames frame_files_remain = [] for frame in frame_files: if frame not in sol_pickle: frame_files_remain.append(frame) #determine index basis mxh = mx_handler() flag_ref_found, miller_array_ref = mxh.get_miller_array_from_reflection_file(hklref_indamb) frames = [(i, frame_files_remain[i], iparams, miller_array_ref) for i in range(len(frame_files_remain))] cc_results = pool_map( iterable=frames, func=solve_with_mtz_mproc, processes=iparams.n_processors) for result in cc_results: pickle_filename, index_basis = result sol_pickle[pickle_filename] = index_basis iparams.data = old_iparams_data[:] #write out solution pickle pickle.dump(sol_pickle, open(sol_fname,"wb")) #write out text output txt_out = "Solving indexing ambiguity complete. Solution file saved to "+sol_fname+"\n" f = open(iparams.run_no+'/log.txt', 'a') f.write(txt_out) f.close() return sol_fname, iparams
def run_by_params(self, iparams): iparams.flag_volume_correction = False if iparams.partiality_model == "Lognormal": iparams.voigt_nu = 0.008 #use voigt_nu as lognpdf zero parameter #read all result pickles try: DIR = iparams.run_no + '/pickles/' pickle_results = [ pickle.load(open(DIR + fname, "rb")) for fname in os.listdir(DIR) ] file_no_results = [ int(fname.split('.')[0]) for fname in os.listdir(DIR) ] n_results = len(pickle_results) except Exception: print "Error reading input pickles." print "*VERSION UPGRADE NOTE* use prime.run instead of prime.postrefine to run all processes together." exit() #get reference file - look for n.mtz with n as maximum number. hklrefin = None if iparams.hklrefin is None: DIR = iparams.run_no + '/mtz/' file_no_list = [ int(fname.split('.')[0]) for fname in os.listdir(DIR) ] if len(file_no_list) > 0: hklrefin = DIR + str(max(file_no_list)) + '.mtz' else: hklrefin = iparams.hklrefin if hklrefin is None: print "No reference set found. Exit program" print "Reference set:", hklrefin, " No. of images:", n_results mxh = mx_handler() flag_hklrefin_found, miller_array_ref = mxh.get_miller_array_from_reflection_file( hklrefin) #post-refinement avg_mode = 'weighted' #run command for post-refinement if iparams.queue.mode is None: frames = [(file_no_results[i], pickle_results[i], iparams, miller_array_ref, avg_mode) for i in range(n_results)] inp_pickle = {'iparams': iparams, 'frames': frames} pickle.dump(inp_pickle, open(iparams.run_no + '/inputs/0.inp', "wb")) call(["prime._postrefine_frame", iparams.run_no + '/inputs/0.inp']) else: #run on n_nodes n_imgs_per_node = int(round(n_results / iparams.queue.n_nodes)) for i_node in range(iparams.queue.n_nodes): start_frame = i_node * n_imgs_per_node if i_node < iparams.queue.n_nodes - 1: end_frame = start_frame + n_imgs_per_node else: end_frame = n_results frames = [(i, pickle_results[i], iparams, miller_array_ref, avg_mode) for i in range(start_frame, end_frame)] inp_pickle = {'iparams': iparams, 'frames': frames} pickle.dump( inp_pickle, open(iparams.run_no + '/inputs/' + str(i_node) + '.inp', "wb")) call([ "bsub", "-q", iparams.queue.qname, "-o", iparams.run_no + "/qout/qout_pr.txt", "prime._postrefine_frame", iparams.run_no + "/inputs/" + str(i_node) + ".inp" ]) runh = run_handler() runh.check_done(iparams, n_results) print "Post-refinement completed. Run prime.merge for the merged reflection file."
def run(argv): #capture starting time time_global_start = datetime.now() import logging logging.captureWarnings(True) formatter = logging.Formatter('%(asctime)s\t%(levelname)s\t%(message)s') console_handler = logging.StreamHandler() console_handler.setLevel(logging.ERROR) console_handler.setFormatter(formatter) logging.getLogger().addHandler(console_handler) logging.getLogger('py.warnings').addHandler(console_handler) logging.basicConfig(format='%(asctime)s\t%(levelname)s\t%(message)s', level=logging.DEBUG) #0.1 determine indexing ambiguity and setup iparams txt_indexing_ambiguity = "Determine if there is an indexing ambiguity on the dataset" print txt_indexing_ambiguity idah = indexing_ambiguity_handler() sol_fname, iparams = idah.run(argv) if sol_fname is None: print "No ambiguity." txt_indexing_ambiguity += "\nNo ambiguity." else: print "Ambiguity is solved. Solution file was saved to :" + str( sol_fname) txt_indexing_ambiguity += "Ambiguity is solved. Solution file was saved to :" + str( sol_fname) iparams.indexing_ambiguity.index_basis_in = sol_fname #0.2 setup parameters iparams.flag_volume_correction = False if iparams.partiality_model == "Lognormal": iparams.voigt_nu = 0.008 #use voigt_nu as lognpdf zero parameter #0.3 read frames frame_files = read_pickles(iparams.data) frames = range(len(frame_files)) #1. prepare reference miller array txt_merge_mean = 'Generating a reference set (will not be used if hklrefin is set)' print txt_merge_mean #Always generate the mean-intensity scaled set. scaled_pres_set = scale_frames(frames, frame_files, iparams) mdh, _txt_merge_mean = merge_frames(scaled_pres_set, iparams) miller_array_ref = mdh.miller_array_merge txt_merge_mean += '\n' + _txt_merge_mean if not iparams.n_postref_cycle: with open(iparams.run_no + '/log.txt', 'a') as f: f.write(txt_indexing_ambiguity + txt_merge_mean) raise Usage( "No. of post-refinement cycle was set to 0. Exit without post-refinement." ) if iparams.hklrefin is not None: mxh = mx_handler() _, miller_array_ref = mxh.get_miller_array_from_reflection_file( iparams.hklrefin) if miller_array_ref is None: raise Usage( "Problem with the assigned reference set. Try setting hklrefin=None and rerun the program." ) #2. Post-refinement txt_merge_postref = '' postref_pres_set = [None] * len(frames) avg_mode = 'weighted' for i_iter in xrange(iparams.n_postref_cycle): if i_iter == (iparams.n_postref_cycle - 1): avg_mode = 'final' postref_good_pres_set, postref_pres_set, _txt_merge_postref = postrefine_frames( i_iter, frames, frame_files, iparams, postref_pres_set, miller_array_ref, avg_mode) if postref_good_pres_set: mdh, _txt_merge_postref = merge_frames( postref_good_pres_set, iparams, avg_mode=avg_mode, mtz_out_prefix='postref_cycle_' + str(i_iter + 1)) miller_array_ref = mdh.miller_array_merge txt_merge_postref += _txt_merge_postref else: raise Usage( "Problem with post-refinement. No images refined. Please check your input file." ) #3. collect caculating time time_global_end = datetime.now() time_global_spent = time_global_end - time_global_start txt_out_time_spent = 'Total calculation time: '+'{0:.2f}'.format(time_global_spent.seconds)+ \ ' seconds\nFinished: '+time_global_end.strftime("%A %d. %B %Y %H:%M:%S")+'\n' print txt_out_time_spent txt_out = txt_indexing_ambiguity + txt_merge_mean + txt_merge_postref + txt_out_time_spent with open(os.path.join(iparams.run_no, 'log.txt'), 'a') as f: f.write(txt_out) with open(os.path.join(iparams.run_no, '.done'), 'w') as f: f.write('Done') return mdh
if (__name__ == "__main__"): uc_tol = 3 ry, rz, re, rotx, roty = (0, 0, 0.008, 0, 0) flag_beam_divergence = False lambda_template = flex.double(range(-50, 50, 1)) / 1000 #0 .read input parameters and frames (pickle files) data, hklrefin, pixel_size_mm, target_unit_cell, \ d_min, d_max = read_input(args = sys.argv[1:]) frame_files = read_pickles(data) for pickle_filename in frame_files: observations_pickle = read_frame(pickle_filename) pickle_filename_arr = pickle_filename.split('/') pickle_filename_only = pickle_filename_arr[len(pickle_filename_arr) - 1] mxh = mx_handler() flag_hklisoin_found, miller_array_iso = mxh.get_miller_array_from_reflection_file( hklrefin) observations = observations_pickle["observations"][0] #check if the uc is good flag_good_unit_cell = good_unit_cell( observations.unit_cell().parameters(), None, uc_tol, target_unit_cell=target_unit_cell) #update lambda_set lambda_set = lambda_template + observations_pickle["wavelength"] crystal_init_orientation = observations_pickle["current_orientation"][ 0] detector_distance_mm = observations_pickle['distance'] mm_predictions = pixel_size_mm * (
def run(self, args): #read inputs from prime.postrefine.mod_input import process_input, read_pickles iparams, txt_out_input = process_input(args) print txt_out_input with open(os.path.join(iparams.run_no, self.module_name, 'log.txt'), 'w') as f: f.write(txt_out_input) #read all integration pickles frame_files = read_pickles(iparams.data) n_frames = len(frame_files) if n_frames == 0: print "No integration pickle found. Exit program." return None, iparams #start if iparams.isoform_cluster.isorefin: #get collection of iso. ref. reflection set. mxh = mx_handler() miller_array_ref_set = [] for isorefin in iparams.isoform_cluster.isorefin: flag_ref_found, miller_array_ref = mxh.get_miller_array_from_reflection_file( isorefin) if flag_ref_found: miller_array_ref_set.append(miller_array_ref) #get observation list frame_files_sel, obs_list = self.get_observation_set( iparams, frame_files, n_frames) if miller_array_ref_set: frames = [(i, frame_files_sel[i], obs_list[i], iparams, miller_array_ref_set) for i in range(len(obs_list))] cc_results = pool_map(iterable=frames, func=solve_with_mtz_mproc, processes=iparams.n_processors) sol_pickle = {} for result in cc_results: pickle_filename, cluster_id = result sol_pickle[pickle_filename] = cluster_id write_out_solutions(iparams, sol_pickle) txt_out = "Cluster images with given " + str( len(miller_array_ref_set) ) + " mtz files completed. Use cluster_0.lst - cluster_k.lst (for k clusters) for merging.\n" print txt_out with open( os.path.join(iparams.run_no, self.module_name, 'log.txt'), 'a') as f: f.write(txt_out) return #************************************************* #solve with Brehm & Diederichs - sample size n_sample_frames then bootstrap the rest txt_out = "Cluster images with B&D algorithms.\n" frame_files_sel, obs_list = self.get_observation_set( iparams, frame_files, iparams.isoform_cluster.n_sample_frames) frames = [(i, frame_files_sel[i], obs_list[i], obs_list) for i in range(len(frame_files_sel))] #calculate r print "Calculating R" calc_r_results = pool_map(iterable=frames, func=calculate_r_mproc, processes=iparams.n_processors) frame_files_sel = [] r_matrix = [] obs_list = [] for result in calc_r_results: if result: pickle_filename, r_set, obs = result frame_files_sel.append(pickle_filename) obs_list.append(obs) if len(r_matrix) == 0: r_matrix = r_set else: r_matrix = np.append(r_matrix, r_set, axis=0) #choose groups with best R print "Selecting frames with best R" i_mean_r = np.argsort(np.mean(r_matrix, axis=1))[::-1] r_matrix_sorted = r_matrix[i_mean_r] frame_files_sorted = np.array(frame_files_sel)[i_mean_r] obs_list_sorted = np.array(obs_list)[i_mean_r] frame_files_sel = [] obs_sel = [] for frame_file, r_set, obs in zip(frame_files_sorted, r_matrix_sorted, obs_list_sorted): if frame_file not in frame_files_sel: frame_files_sel.append(frame_file) obs_sel.append(obs) print frame_file, np.mean(r_set) if len(frame_files_sel ) >= iparams.isoform_cluster.n_selected_frames: print 'Found all %6.0f good frames' % ( len(frame_files_sel)) break #Recalculate r for the new selected list frames = [(i, frame_files_sel[i], obs_sel[i], obs_sel) for i in range(len(frame_files_sel))] print "Re-calculating R" calc_r_results = pool_map(iterable=frames, func=calculate_r_mproc, processes=iparams.n_processors) frame_files_sel = [] r_matrix = [] obs_list = [] for result in calc_r_results: if result: pickle_filename, r_set, obs = result frame_files_sel.append(pickle_filename) obs_list.append(obs) if len(r_matrix) == 0: r_matrix = r_set else: r_matrix = np.append(r_matrix, r_set, axis=0) print "Minimizing frame distance" isoch = isoform_cluster_handler() x_set = isoch.optimize(r_matrix, flag_plot=iparams.flag_plot) print "Clustering results" kmh = kmeans_handler() k = iparams.isoform_cluster.n_clusters centroids, labels = kmh.run(x_set, k, flag_plot=iparams.flag_plot) print "Get solution pickle and cluster files list" sol_pickle, cluster_files = isoch.assign_cluster(frame_files_sel, labels, k, \ os.path.join(iparams.run_no,self.module_name)) #if more frames found, merge the sample frames to get a reference set #that can be used for breaking the ambiguity. if n_frames > iparams.isoform_cluster.n_selected_frames: print "Assign cluster_id for the remaining images." old_iparams_data = iparams.data[:] miller_array_ref_set = [] from prime.command_line.postrefine import scale_frames, merge_frames for i in range(k): #generate a reference set from solved frames with open(cluster_files[i]) as f: frame_files_processed = f.read().split('\n')[:-1] scaled_pres_set = scale_frames( range(len(frame_files_processed)), frame_files_processed, iparams) mdh, txt_merge_out = merge_frames(scaled_pres_set, iparams, \ mtz_out_prefix=os.path.join(self.module_name,'cluster_'+str(i))) miller_array_ref_set.append(mdh.miller_array_merge) txt_out += txt_merge_out #setup a list of remaining frames frame_files_remain = [ frame for frame in frame_files if frame not in sol_pickle ] frame_files_remain_sel, obs_remain_sel_list = self.get_observation_set(iparams, \ frame_files_remain, len(frame_files_remain)) frames = [(i, frame_files_remain_sel[i], obs_remain_sel_list[i], iparams, miller_array_ref_set) for i in range(len(obs_remain_sel_list))] cc_results = pool_map(iterable=frames, func=solve_with_mtz_mproc, processes=iparams.n_processors) for result in cc_results: if result: pickle_filename, cluster_id = result sol_pickle[pickle_filename] = cluster_id iparams.data = old_iparams_data[:] #write out solution pickle write_out_solutions(iparams, sol_pickle) #write out text output txt = "Cluster images completed. Use cluster_0.lst - cluster_k.lst (for k clusters) for merging.\n" txt_out += txt print txt with open(os.path.join(iparams.run_no, self.module_name, 'log.txt'), 'a') as f: f.write(txt_out)
if (__name__ == "__main__"): uc_tol = 3 ry, rz, re, rotx, roty = (0, 0, 0.008, 0, 0) flag_beam_divergence = False lambda_template = flex.double(range(-50,50,1))/1000 #0 .read input parameters and frames (pickle files) data, hklrefin, pixel_size_mm, target_unit_cell, \ d_min, d_max = read_input(args = sys.argv[1:]) frame_files = read_pickles(data) for pickle_filename in frame_files: observations_pickle = pickle.load(open(pickle_filename,"rb")) pickle_filename_arr = pickle_filename.split('/') pickle_filename_only = pickle_filename_arr[len(pickle_filename_arr)-1] mxh = mx_handler() flag_hklisoin_found, miller_array_iso = mxh.get_miller_array_from_reflection_file(hklrefin) observations = observations_pickle["observations"][0] #check if the uc is good flag_good_unit_cell = good_unit_cell(observations.unit_cell().parameters(), None, uc_tol, target_unit_cell=target_unit_cell) #update lambda_set lambda_set = lambda_template + observations_pickle["wavelength"] crystal_init_orientation = observations_pickle["current_orientation"][0] detector_distance_mm = observations_pickle['distance'] mm_predictions = pixel_size_mm*(observations_pickle['mapped_predictions'][0]) xbeam = observations_pickle["xbeam"] ybeam = observations_pickle["ybeam"] alpha_angle = flex.double([math.atan(abs(pred[0]-xbeam)/abs(pred[1]-ybeam)) \ for pred in mm_predictions]) spot_pred_x_mm = flex.double([pred[0]-xbeam for pred in mm_predictions]) spot_pred_y_mm = flex.double([pred[1]-ybeam for pred in mm_predictions])
def get_results(self, finished_objects=None): if not finished_objects: finished_objects = self.info.get_finished_objects() if not finished_objects: return False final_objects = [] self.info.unplotted_stats = {} for key in self.info.stats: self.info.unplotted_stats[key] = dict(lst=[]) for obj in finished_objects: item = [obj.input_index, obj.img_path, obj.img_index] if len(self.info.unprocessed) > 0 and item in self.info.unprocessed: self.info.unprocessed.remove(item) if ( len(self.info.categories["not_processed"][0]) > 0 and item in self.info.categories["not_processed"][0] ): self.info.categories["not_processed"][0].remove(item) if obj.fail: key = obj.fail.replace(" ", "_") if key in self.info.categories: self.info.categories[key][0].append(item) else: self.info.categories["integrated"][0].append(obj.final["final"]) self.info.final_objects.append(obj.obj_file) final_objects.append(obj) if not obj.fail or "triage" not in obj.fail: self.info.categories["have_diffraction"][0].append(obj.img_path) # Calculate processing stats from final objects if final_objects: self.info.pixel_size = final_objects[0].final["pixel_size"] # Get observations from file try: all_obs = ep.load(self.info.idx_file) except Exception: all_obs = None # Collect image processing stats for obj in final_objects: for key in self.info.stats: if key in obj.final: stat_tuple = ( obj.input_index, obj.img_path, obj.img_index, obj.final[key], ) self.info.stats[key]["lst"].append(stat_tuple) # add proc filepath info to 'pointers' pointer_dict = { "img_file": obj.img_path, "obj_file": obj.obj_file, "img_index": obj.img_index, "experiments": obj.eint_path, "reflections": obj.rint_path, } self.info.pointers[str(obj.input_index)] = pointer_dict if key not in self.info.unplotted_stats: self.info.unplotted_stats[key] = dict(lst=[]) self.info.unplotted_stats[key]["lst"].append(stat_tuple) # Unit cells and space groups (i.e. cluster iterable) self.info.cluster_iterable.append( [ float(obj.final["a"]), float(obj.final["b"]), float(obj.final["c"]), float(obj.final["alpha"]), float(obj.final["beta"]), float(obj.final["gamma"]), str(obj.final["sg"]), ] ) # Get observations from this image obs = None if "observations" in obj.final: obs = obj.final["observations"].as_non_anomalous_array() else: pickle_path = obj.final["final"] if os.path.isfile(pickle_path): try: pickle = ep.load(pickle_path) obs = pickle["observations"][0].as_non_anomalous_array() except Exception as e: print( "IMAGE_PICKLE_ERROR for {}: {}".format(pickle_path, e) ) with util.Capturing(): if obs: # Append observations to combined miller array obs = obs.expand_to_p1() if all_obs: all_obs = all_obs.concatenate( obs, assert_is_similar_symmetry=False ) else: all_obs = obs # Get B-factor from this image try: mxh = mx_handler() asu_contents = mxh.get_asu_contents(500) observations_as_f = obs.as_amplitude_array() observations_as_f.setup_binner(auto_binning=True) wp = statistics.wilson_plot( observations_as_f, asu_contents, e_statistics=True ) b_factor = wp.wilson_b except RuntimeError as e: b_factor = 0 print("B_FACTOR_ERROR: ", e) self.info.b_factors.append(b_factor) # Save collected observations to file if all_obs: ep.dump(self.info.idx_file, all_obs) # Calculate dataset stats for k in self.info.stats: stat_list = list(zip(*self.info.stats[k]["lst"]))[3] stats = dict( lst=self.info.stats[k]["lst"], median=np.median(stat_list).item(), mean=np.mean(stat_list).item(), std=np.std(stat_list).item(), max=np.max(stat_list).item(), min=np.min(stat_list).item(), cons=Counter(stat_list).most_common(1)[0][0], ) self.info.stats[k].update(stats) return True else: return False