def run(self, args):
   #read inputs
   runh = run_handler()
   from prime.postrefine.mod_input import process_input
   iparams, txt_out_input = process_input(argv=args, flag_check_exist=False)
   iparams.flag_volume_correction = False
   if iparams.partiality_model == "Lognormal":
     iparams.voigt_nu = 0.008 #use voigt_nu as lognpdf zero parameter
   #read all result pickles
   try:
     DIR = iparams.run_no+'/pickles/'
     pickle_results = [pickle.load(open(DIR+fname, "rb")) for fname in os.listdir(DIR)]
     n_results = len(pickle_results)
   except Exception:
     print "Error reading input pickles."
     print "*VERSION UPGRADE NOTE* use prime.run instead of prime.postrefine to run all processes together."
     exit()
   #get reference file - look for n.mtz with n as maximum number.
   hklrefin = None
   if iparams.hklrefin is None:
     DIR = iparams.run_no+'/mtz/'
     file_no_list = [int(fname.split('.')[0]) for fname in os.listdir(DIR)]
     if len(file_no_list) > 0:
       hklrefin = DIR + str(max(file_no_list)) + '.mtz'
   else:
     hklrefin = iparams.hklrefin
   if hklrefin is None:
     print "No reference set found. Exit program"
   print "Reference set:", hklrefin
   mxh = mx_handler()
   flag_hklrefin_found, miller_array_ref = mxh.get_miller_array_from_reflection_file(hklrefin)
   #post-refinement
   avg_mode = 'weighted'
   #run command for post-refinement
   frames = [(i, pickle_results[i], iparams, miller_array_ref, avg_mode) for i in range(n_results)]
   inp_pickle = {'iparams':iparams, 'frames':frames}
   pickle.dump(inp_pickle, open(iparams.run_no+'/inputs/0.inp',"wb"))
   call(["prime._postrefine_frame", iparams.run_no+'/inputs/0.inp'])
   runh.check_done(iparams, n_results)
   print "Post-refinement completed. Run prime.merge for the merged reflection file."
Exemple #2
0
    def get_results(self, finished_objects=None):
        if not finished_objects:
            finished_objects = self.info.get_finished_objects()
            if not finished_objects:
                return False
        final_objects = []

        self.info.unplotted_stats = {}
        for key in self.info.stats:
            self.info.unplotted_stats[key] = dict(lst=[])

        for obj in finished_objects:
            item = [obj.input_index, obj.img_path, obj.img_index]
            if len(self.info.unprocessed
                   ) > 0 and item in self.info.unprocessed:
                self.info.unprocessed.remove(item)
            if (len(self.info.categories['not_processed'][0]) > 0
                    and item in self.info.categories['not_processed'][0]):
                self.info.categories['not_processed'][0].remove(item)

            if obj.fail:
                key = obj.fail.replace(' ', '_')
                if key in self.info.categories:
                    self.info.categories[key][0].append(item)
            else:
                self.info.categories['integrated'][0].append(
                    obj.final['final'])
                self.info.final_objects.append(obj.obj_file)
                final_objects.append(obj)

            if not obj.fail or 'triage' not in obj.fail:
                self.info.categories['have_diffraction'][0].append(
                    obj.img_path)

        # Calculate processing stats from final objects
        if final_objects:
            self.info.pixel_size = final_objects[0].final['pixel_size']

            # Get observations from file
            try:
                all_obs = ep.load(self.info.idx_file)
            except Exception:
                all_obs = None

            # Collect image processing stats
            for obj in final_objects:
                for key in self.info.stats:
                    if key in obj.final:
                        stat_tuple = (obj.input_index, obj.img_path,
                                      obj.img_index, obj.final[key])
                        self.info.stats[key]['lst'].append(stat_tuple)

                        if key not in self.info.unplotted_stats:
                            self.info.unplotted_stats[key] = dict(lst=[])
                        self.info.unplotted_stats[key]['lst'].append(
                            stat_tuple)

                # Unit cells and space groups (i.e. cluster iterable)
                self.info.cluster_iterable.append([
                    float(obj.final['a']),
                    float(obj.final['b']),
                    float(obj.final['c']),
                    float(obj.final['alpha']),
                    float(obj.final['beta']),
                    float(obj.final['gamma']),
                    str(obj.final['sg'])
                ])

                # Get observations from this image
                obs = None
                if 'observations' in obj.final:
                    obs = obj.final['observations'].as_non_anomalous_array()
                else:
                    pickle_path = obj.final['final']
                    if os.path.isfile(pickle_path):
                        try:
                            pickle = ep.load(pickle_path)
                            obs = pickle['observations'][
                                0].as_non_anomalous_array()
                        except Exception as e:
                            print('IMAGE_PICKLE_ERROR for {}: {}'.format(
                                pickle_path, e))

                with util.Capturing():
                    if obs:
                        # Append observations to combined miller array
                        obs = obs.expand_to_p1()
                        if all_obs:
                            all_obs = all_obs.concatenate(
                                obs, assert_is_similar_symmetry=False)
                        else:
                            all_obs = obs

                        # Get B-factor from this image
                        try:
                            mxh = mx_handler()
                            asu_contents = mxh.get_asu_contents(500)
                            observations_as_f = obs.as_amplitude_array()
                            observations_as_f.setup_binner(auto_binning=True)
                            wp = statistics.wilson_plot(observations_as_f,
                                                        asu_contents,
                                                        e_statistics=True)
                            b_factor = wp.wilson_b
                        except RuntimeError as e:
                            b_factor = 0
                            print('B_FACTOR_ERROR: ', e)
                        self.info.b_factors.append(b_factor)

            # Save collected observations to file
            if all_obs:
                ep.dump(self.info.idx_file, all_obs)

            # Calculate dataset stats
            for k in self.info.stats:
                stat_list = list(zip(*self.info.stats[k]['lst']))[2]
                stats = dict(lst=self.info.stats[k]['lst'],
                             median=np.median(stat_list),
                             mean=np.mean(stat_list),
                             std=np.std(stat_list),
                             max=np.max(stat_list),
                             min=np.min(stat_list),
                             cons=Counter(stat_list).most_common(1)[0][0])
                self.info.stats[k].update(stats)
            return True
        else:
            return False
 def run(self, args):
   #read inputs
   from prime.postrefine.mod_input import process_input, read_pickles
   iparams, txt_out_input = process_input(args)
   print txt_out_input
   f = open(iparams.run_no+'/log.txt', 'w')
   f.write(txt_out_input)
   f.close()
   #if solution pickle is given, return the file name
   if iparams.indexing_ambiguity.index_basis_in is not None:
     if iparams.indexing_ambiguity.index_basis_in.endswith('.pickle'):
       return iparams.indexing_ambiguity.index_basis_in, iparams
   #read all integration pickles
   frame_files = read_pickles(iparams.data)
   n_frames = len(frame_files)
   if n_frames == 0:
     print "No integration pickle found. Exit program."
     return None, iparams
   #exit if no problem
   if self.should_terminate(iparams, frame_files[0]):
     print "No indexing ambiguity problem. Set index_ambiguity.mode = Forced and assigned_basis = list of basis formats to solve pseudo-twinning problem."
     return None, iparams
   #continue with (Auto - alt>1, find solution), (Auto - alt>1, mtz)
   #(Forced - assigned_basis, mtz), (Forced - assigned_basis, find solution)
   #*************************************************
   #if mtz file is given, use it to solve the problem
   sol_fname = iparams.run_no+'/index_ambiguity/solution_pickle.pickle'
   if iparams.indexing_ambiguity.index_basis_in is not None:
     if iparams.indexing_ambiguity.index_basis_in.endswith('.mtz'):
       mxh = mx_handler()
       flag_ref_found, miller_array_ref = mxh.get_miller_array_from_reflection_file(iparams.indexing_ambiguity.index_basis_in)
       if flag_ref_found == False:
         print "Reference mtz file not found. Set indexing_ambiguity.index_basis_in = None to enable auto generate the solutions."
         return None, iparams
       else:
         frames = [(i, frame_files[i], iparams, miller_array_ref) for i in range(n_frames)]
         cc_results = pool_map(
           iterable=frames,
           func=solve_with_mtz_mproc,
           processes=iparams.n_processors)
         sol_pickle = {}
         for result in cc_results:
           pickle_filename, index_basis = result
           sol_pickle[pickle_filename] = index_basis
         pickle.dump(sol_pickle, open(sol_fname,"wb"))
         return sol_fname, iparams
   #*************************************************
   #solve with Brehm & Diederichs - sample size n_sample_frames then bootstrap the rest
   frames = [(i, frame_files[i], iparams) for i in random.sample(range(n_frames), iparams.indexing_ambiguity.n_sample_frames)]
   #get observations list
   print "Reading observations"
   alt_dict_results = pool_map(
         iterable=frames,
         func=get_obs_mproc,
         processes=iparams.n_processors)
   frame_dup_files = []
   frame_keys = []
   obs_list = []
   for result in alt_dict_results:
     alt_dict, pickle_filename = result
     if alt_dict is not None:
       for key in alt_dict.keys():
         frame_dup_files.append(pickle_filename)
         frame_keys.append(key)
         obs_list.append(alt_dict[key])
   frames = [(i, frame_dup_files[i], frame_keys[i], obs_list[i], obs_list) for i in range(len(frame_dup_files))]
   #calculate r
   print "Calculating R"
   calc_r_results = pool_map(
         iterable=frames,
         func=calculate_r_mproc,
         processes=iparams.n_processors)
   frame_dup_files = []
   frame_keys = []
   r_matrix = []
   for result in calc_r_results:
     if result is not None:
       pickle_filename, index_basis, r_set = result
       frame_dup_files.append(pickle_filename)
       frame_keys.append(index_basis)
       if len(r_matrix) == 0:
         r_matrix = r_set
       else:
         r_matrix = np.append(r_matrix, r_set, axis=0)
   #choose groups with best CC
   print "Selecting frames with best R"
   i_mean_r = np.argsort(np.mean(r_matrix, axis=1))[::-1]
   r_matrix_sorted = r_matrix[i_mean_r]
   frame_dup_files_sorted = np.array(frame_dup_files)[i_mean_r]
   frame_keys_sorted = np.array(frame_keys)[i_mean_r]
   frame_dup_files_sel = []
   for frame_file, frame_key, r_set in zip(frame_dup_files_sorted, frame_keys_sorted, r_matrix_sorted):
     if frame_file not in frame_dup_files_sel:
       frame_dup_files_sel.append(frame_file)
       print frame_file, frame_key, np.mean(r_set)
       if len(frame_dup_files_sel) >= iparams.indexing_ambiguity.n_selected_frames:
         print 'Found all %6.0f good frames'%(len(frame_dup_files_sel))
         break
   ##
   #rebuild observations and r_matrix
   frames = [(i, frame_dup_files_sel[i], iparams) for i in range(len(frame_dup_files_sel))]
   #get observations list
   print "Re-reading observations"
   alt_dict_results = pool_map(
         iterable=frames,
         func=get_obs_mproc,
         processes=iparams.n_processors)
   frame_dup_files = []
   frame_keys = []
   obs_list = []
   for result in alt_dict_results:
     alt_dict, pickle_filename = result
     if alt_dict is not None:
       for key in alt_dict.keys():
         frame_dup_files.append(pickle_filename)
         frame_keys.append(key)
         obs_list.append(alt_dict[key])
   frames = [(i, frame_dup_files[i], frame_keys[i], obs_list[i], obs_list) for i in range(len(frame_dup_files))]
   #calculate r
   print "Re-calculating R"
   calc_r_results = pool_map(
         iterable=frames,
         func=calculate_r_mproc,
         processes=iparams.n_processors)
   frame_dup_files = []
   frame_keys = []
   r_matrix = []
   for result in calc_r_results:
     if result is not None:
       pickle_filename, index_basis, r_set = result
       frame_dup_files.append(pickle_filename)
       frame_keys.append(index_basis)
       if len(r_matrix) == 0:
         r_matrix = r_set
       else:
         r_matrix = np.append(r_matrix, r_set, axis=0)
   print "Minimizing frame distance"
   idah = indamb_handler()
   x_set = idah.optimize(r_matrix, flag_plot=iparams.flag_plot)
   x_pickle = {'frame_dup_files':frame_dup_files, 'frame_keys':frame_keys, \
     'r_matrix':r_matrix, 'x_set':x_set}
   pickle.dump(x_pickle, open(iparams.run_no+'/index_ambiguity/x.out',"wb"))
   print "Clustering results"
   kmh = kmeans_handler()
   k = 2**(len(idah.get_observations(frame_dup_files[0], iparams))-1)
   centroids, labels = kmh.run(x_set, k, flag_plot=iparams.flag_plot)
   print "Get solution pickle"
   sample_fname = iparams.run_no+'/index_ambiguity/sample.lst'
   sol_pickle = idah.assign_basis(frame_dup_files, frame_keys, labels, k, sample_fname)
   pickle.dump(sol_pickle, open(sol_fname,"wb"))
   #if more frames found, merge the sample frames to get a reference set
   #that can be used for breaking the ambiguity.
   if n_frames > iparams.indexing_ambiguity.n_selected_frames:
     print "Breaking the indexing ambiguity for the remaining images."
     old_iparams_data = iparams.data[:]
     iparams.data = [sample_fname]
     iparams.indexing_ambiguity.index_basis_in = sol_fname
     grh = genref_handler()
     grh.run_by_params(iparams)
     mh = merge_handler()
     mh.run_by_params(iparams)
     DIR = iparams.run_no+'/mtz/'
     file_no_list = [int(fname.split('.')[0]) for fname in os.listdir(DIR)]
     if len(file_no_list) > 0:
       hklref_indamb = DIR + str(max(file_no_list)) + '.mtz'
       print "Bootstrap reference reflection set:", hklref_indamb
       #setup a list of remaining frames
       frame_files_remain = []
       for frame in frame_files:
         if frame not in sol_pickle:
           frame_files_remain.append(frame)
       #determine index basis
       mxh = mx_handler()
       flag_ref_found, miller_array_ref = mxh.get_miller_array_from_reflection_file(hklref_indamb)
       frames = [(i, frame_files_remain[i], iparams, miller_array_ref) for i in range(len(frame_files_remain))]
       cc_results = pool_map(
         iterable=frames,
         func=solve_with_mtz_mproc,
         processes=iparams.n_processors)
       for result in cc_results:
         pickle_filename, index_basis = result
         sol_pickle[pickle_filename] = index_basis
     iparams.data = old_iparams_data[:]
   #write out solution pickle
   pickle.dump(sol_pickle, open(sol_fname,"wb"))
   #write out text output
   txt_out = "Solving indexing ambiguity complete. Solution file saved to "+sol_fname+"\n"
   f = open(iparams.run_no+'/log.txt', 'a')
   f.write(txt_out)
   f.close()
   return sol_fname, iparams
 def run(self, args):
   #read inputs
   from prime.postrefine.mod_input import process_input, read_pickles
   iparams, txt_out_input = process_input(args)
   print txt_out_input
   f = open(iparams.run_no+'/log.txt', 'w')
   f.write(txt_out_input)
   f.close()
   #if solution pickle is given, return the file name
   if iparams.indexing_ambiguity.index_basis_in is not None:
     if iparams.indexing_ambiguity.index_basis_in.endswith('.pickle'):
       return iparams.indexing_ambiguity.index_basis_in, iparams
   #read all integration pickles
   frame_files = read_pickles(iparams.data)
   n_frames = len(frame_files)
   if n_frames == 0:
     print "No integration pickle found. Exit program."
     return None, iparams
   #exit if no problem
   if self.should_terminate(iparams, frame_files[0]):
     print "No indexing ambiguity problem. Set index_ambiguity.mode = Forced and assigned_basis = list of basis formats to solve pseudo-twinning problem."
     return None, iparams
   #continue with (Auto - alt>1, find solution), (Auto - alt>1, mtz)
   #(Forced - assigned_basis, mtz), (Forced - assigned_basis, find solution)
   #*************************************************
   #if mtz file is given, use it to solve the problem
   sol_fname = iparams.run_no+'/index_ambiguity/solution_pickle.pickle'
   if iparams.indexing_ambiguity.index_basis_in is not None:
     if iparams.indexing_ambiguity.index_basis_in.endswith('.mtz'):
       mxh = mx_handler()
       flag_ref_found, miller_array_ref = mxh.get_miller_array_from_reflection_file(iparams.indexing_ambiguity.index_basis_in)
       if flag_ref_found == False:
         print "Reference mtz file not found. Set indexing_ambiguity.index_basis_in = None to enable auto generate the solutions."
         return None, iparams
       else:
         frames = [(i, frame_files[i], iparams, miller_array_ref) for i in range(n_frames)]
         cc_results = pool_map(
           iterable=frames,
           func=solve_with_mtz_mproc,
           processes=iparams.n_processors)
         sol_pickle = {}
         for result in cc_results:
           pickle_filename, index_basis = result
           sol_pickle[pickle_filename] = index_basis
         pickle.dump(sol_pickle, open(sol_fname,"wb"))
         return sol_fname, iparams
   #*************************************************
   #solve with Brehm & Diederichs - sample size n_sample_frames then bootstrap the rest
   frames = [(i, frame_files[i], iparams) for i in random.sample(range(n_frames), iparams.indexing_ambiguity.n_sample_frames)]
   #get observations list
   print "Reading observations"
   alt_dict_results = pool_map(
         iterable=frames,
         func=get_obs_mproc,
         processes=iparams.n_processors)
   frame_dup_files = []
   frame_keys = []
   obs_list = []
   for result in alt_dict_results:
     alt_dict, pickle_filename = result
     if alt_dict is not None:
       for key in alt_dict.keys():
         frame_dup_files.append(pickle_filename)
         frame_keys.append(key)
         obs_list.append(alt_dict[key])
   frames = [(i, frame_dup_files[i], frame_keys[i], obs_list[i], obs_list) for i in range(len(frame_dup_files))]
   #calculate r
   print "Calculating R"
   calc_r_results = pool_map(
         iterable=frames,
         func=calculate_r_mproc,
         processes=iparams.n_processors)
   frame_dup_files = []
   frame_keys = []
   r_matrix = []
   for result in calc_r_results:
     if result is not None:
       pickle_filename, index_basis, r_set = result
       frame_dup_files.append(pickle_filename)
       frame_keys.append(index_basis)
       if len(r_matrix) == 0:
         r_matrix = r_set
       else:
         r_matrix = np.append(r_matrix, r_set, axis=0)
   #choose groups with best CC
   print "Selecting frames with best R"
   i_mean_r = np.argsort(np.mean(r_matrix, axis=1))[::-1]
   r_matrix_sorted = r_matrix[i_mean_r]
   frame_dup_files_sorted = np.array(frame_dup_files)[i_mean_r]
   frame_keys_sorted = np.array(frame_keys)[i_mean_r]
   frame_dup_files_sel = []
   for frame_file, frame_key, r_set in zip(frame_dup_files_sorted, frame_keys_sorted, r_matrix_sorted):
     if frame_file not in frame_dup_files_sel:
       frame_dup_files_sel.append(frame_file)
       print frame_file, frame_key, np.mean(r_set)
       if len(frame_dup_files_sel) >= iparams.indexing_ambiguity.n_selected_frames:
         print 'Found all %6.0f good frames'%(len(frame_dup_files_sel))
         break
   ##
   #rebuild observations and r_matrix
   frames = [(i, frame_dup_files_sel[i], iparams) for i in range(len(frame_dup_files_sel))]
   #get observations list
   print "Re-reading observations"
   alt_dict_results = pool_map(
         iterable=frames,
         func=get_obs_mproc,
         processes=iparams.n_processors)
   frame_dup_files = []
   frame_keys = []
   obs_list = []
   for result in alt_dict_results:
     alt_dict, pickle_filename = result
     if alt_dict is not None:
       for key in alt_dict.keys():
         frame_dup_files.append(pickle_filename)
         frame_keys.append(key)
         obs_list.append(alt_dict[key])
   frames = [(i, frame_dup_files[i], frame_keys[i], obs_list[i], obs_list) for i in range(len(frame_dup_files))]
   #calculate r
   print "Re-calculating R"
   calc_r_results = pool_map(
         iterable=frames,
         func=calculate_r_mproc,
         processes=iparams.n_processors)
   frame_dup_files = []
   frame_keys = []
   r_matrix = []
   for result in calc_r_results:
     if result is not None:
       pickle_filename, index_basis, r_set = result
       frame_dup_files.append(pickle_filename)
       frame_keys.append(index_basis)
       if len(r_matrix) == 0:
         r_matrix = r_set
       else:
         r_matrix = np.append(r_matrix, r_set, axis=0)
   print "Minimizing frame distance"
   idah = indamb_handler()
   x_set = idah.optimize(r_matrix, flag_plot=iparams.flag_plot)
   x_pickle = {'frame_dup_files':frame_dup_files, 'frame_keys':frame_keys, \
     'r_matrix':r_matrix, 'x_set':x_set}
   pickle.dump(x_pickle, open(iparams.run_no+'/index_ambiguity/x.out',"wb"))
   print "Clustering results"
   kmh = kmeans_handler()
   k = 2**(len(idah.get_observations(frame_dup_files[0], iparams))-1)
   centroids, labels = kmh.run(x_set, k, flag_plot=iparams.flag_plot)
   print "Get solution pickle"
   sample_fname = iparams.run_no+'/index_ambiguity/sample.lst'
   sol_pickle = idah.assign_basis(frame_dup_files, frame_keys, labels, k, sample_fname)
   pickle.dump(sol_pickle, open(sol_fname,"wb"))
   #if more frames found, merge the sample frames to get a reference set
   #that can be used for breaking the ambiguity.
   if n_frames > iparams.indexing_ambiguity.n_selected_frames:
     print "Breaking the indexing ambiguity for the remaining images."
     old_iparams_data = iparams.data[:]
     iparams.data = [sample_fname]
     iparams.indexing_ambiguity.index_basis_in = sol_fname
     grh = genref_handler()
     grh.run_by_params(iparams)
     mh = merge_handler()
     mh.run_by_params(iparams)
     DIR = iparams.run_no+'/mtz/'
     file_no_list = [int(fname.split('.')[0]) for fname in os.listdir(DIR)]
     if len(file_no_list) > 0:
       hklref_indamb = DIR + str(max(file_no_list)) + '.mtz'
       print "Bootstrap reference reflection set:", hklref_indamb
       #setup a list of remaining frames
       frame_files_remain = []
       for frame in frame_files:
         if frame not in sol_pickle:
           frame_files_remain.append(frame)
       #determine index basis
       mxh = mx_handler()
       flag_ref_found, miller_array_ref = mxh.get_miller_array_from_reflection_file(hklref_indamb)
       frames = [(i, frame_files_remain[i], iparams, miller_array_ref) for i in range(len(frame_files_remain))]
       cc_results = pool_map(
         iterable=frames,
         func=solve_with_mtz_mproc,
         processes=iparams.n_processors)
       for result in cc_results:
         pickle_filename, index_basis = result
         sol_pickle[pickle_filename] = index_basis
     iparams.data = old_iparams_data[:]
   #write out solution pickle
   pickle.dump(sol_pickle, open(sol_fname,"wb"))
   #write out text output
   txt_out = "Solving indexing ambiguity complete. Solution file saved to "+sol_fname+"\n"
   f = open(iparams.run_no+'/log.txt', 'a')
   f.write(txt_out)
   f.close()
   return sol_fname, iparams
Exemple #5
0
 def run_by_params(self, iparams):
     iparams.flag_volume_correction = False
     if iparams.partiality_model == "Lognormal":
         iparams.voigt_nu = 0.008  #use voigt_nu as lognpdf zero parameter
     #read all result pickles
     try:
         DIR = iparams.run_no + '/pickles/'
         pickle_results = [
             pickle.load(open(DIR + fname, "rb"))
             for fname in os.listdir(DIR)
         ]
         file_no_results = [
             int(fname.split('.')[0]) for fname in os.listdir(DIR)
         ]
         n_results = len(pickle_results)
     except Exception:
         print "Error reading input pickles."
         print "*VERSION UPGRADE NOTE* use prime.run instead of prime.postrefine to run all processes together."
         exit()
     #get reference file - look for n.mtz with n as maximum number.
     hklrefin = None
     if iparams.hklrefin is None:
         DIR = iparams.run_no + '/mtz/'
         file_no_list = [
             int(fname.split('.')[0]) for fname in os.listdir(DIR)
         ]
         if len(file_no_list) > 0:
             hklrefin = DIR + str(max(file_no_list)) + '.mtz'
     else:
         hklrefin = iparams.hklrefin
     if hklrefin is None:
         print "No reference set found. Exit program"
     print "Reference set:", hklrefin, " No. of images:", n_results
     mxh = mx_handler()
     flag_hklrefin_found, miller_array_ref = mxh.get_miller_array_from_reflection_file(
         hklrefin)
     #post-refinement
     avg_mode = 'weighted'
     #run command for post-refinement
     if iparams.queue.mode is None:
         frames = [(file_no_results[i], pickle_results[i], iparams,
                    miller_array_ref, avg_mode) for i in range(n_results)]
         inp_pickle = {'iparams': iparams, 'frames': frames}
         pickle.dump(inp_pickle, open(iparams.run_no + '/inputs/0.inp',
                                      "wb"))
         call(["prime._postrefine_frame", iparams.run_no + '/inputs/0.inp'])
     else:
         #run on n_nodes
         n_imgs_per_node = int(round(n_results / iparams.queue.n_nodes))
         for i_node in range(iparams.queue.n_nodes):
             start_frame = i_node * n_imgs_per_node
             if i_node < iparams.queue.n_nodes - 1:
                 end_frame = start_frame + n_imgs_per_node
             else:
                 end_frame = n_results
             frames = [(i, pickle_results[i], iparams, miller_array_ref,
                        avg_mode) for i in range(start_frame, end_frame)]
             inp_pickle = {'iparams': iparams, 'frames': frames}
             pickle.dump(
                 inp_pickle,
                 open(iparams.run_no + '/inputs/' + str(i_node) + '.inp',
                      "wb"))
             call([
                 "bsub", "-q", iparams.queue.qname, "-o",
                 iparams.run_no + "/qout/qout_pr.txt",
                 "prime._postrefine_frame",
                 iparams.run_no + "/inputs/" + str(i_node) + ".inp"
             ])
     runh = run_handler()
     runh.check_done(iparams, n_results)
     print "Post-refinement completed. Run prime.merge for the merged reflection file."
Exemple #6
0
def run(argv):
    #capture starting time
    time_global_start = datetime.now()
    import logging
    logging.captureWarnings(True)
    formatter = logging.Formatter('%(asctime)s\t%(levelname)s\t%(message)s')
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.ERROR)
    console_handler.setFormatter(formatter)
    logging.getLogger().addHandler(console_handler)
    logging.getLogger('py.warnings').addHandler(console_handler)
    logging.basicConfig(format='%(asctime)s\t%(levelname)s\t%(message)s',
                        level=logging.DEBUG)
    #0.1 determine indexing ambiguity and setup iparams
    txt_indexing_ambiguity = "Determine if there is an indexing ambiguity on the dataset"
    print txt_indexing_ambiguity
    idah = indexing_ambiguity_handler()
    sol_fname, iparams = idah.run(argv)
    if sol_fname is None:
        print "No ambiguity."
        txt_indexing_ambiguity += "\nNo ambiguity."
    else:
        print "Ambiguity is solved. Solution file was saved to :" + str(
            sol_fname)
        txt_indexing_ambiguity += "Ambiguity is solved. Solution file was saved to :" + str(
            sol_fname)
        iparams.indexing_ambiguity.index_basis_in = sol_fname
    #0.2 setup parameters
    iparams.flag_volume_correction = False
    if iparams.partiality_model == "Lognormal":
        iparams.voigt_nu = 0.008  #use voigt_nu as lognpdf zero parameter
    #0.3 read frames
    frame_files = read_pickles(iparams.data)
    frames = range(len(frame_files))
    #1. prepare reference miller array
    txt_merge_mean = 'Generating a reference set (will not be used if hklrefin is set)'
    print txt_merge_mean
    #Always generate the mean-intensity scaled set.
    scaled_pres_set = scale_frames(frames, frame_files, iparams)
    mdh, _txt_merge_mean = merge_frames(scaled_pres_set, iparams)
    miller_array_ref = mdh.miller_array_merge
    txt_merge_mean += '\n' + _txt_merge_mean
    if not iparams.n_postref_cycle:
        with open(iparams.run_no + '/log.txt', 'a') as f:
            f.write(txt_indexing_ambiguity + txt_merge_mean)
        raise Usage(
            "No. of post-refinement cycle was set to 0. Exit without post-refinement."
        )
    if iparams.hklrefin is not None:
        mxh = mx_handler()
        _, miller_array_ref = mxh.get_miller_array_from_reflection_file(
            iparams.hklrefin)
    if miller_array_ref is None:
        raise Usage(
            "Problem with the assigned reference set. Try setting hklrefin=None and rerun the program."
        )
    #2. Post-refinement
    txt_merge_postref = ''
    postref_pres_set = [None] * len(frames)
    avg_mode = 'weighted'
    for i_iter in xrange(iparams.n_postref_cycle):
        if i_iter == (iparams.n_postref_cycle - 1): avg_mode = 'final'
        postref_good_pres_set, postref_pres_set, _txt_merge_postref = postrefine_frames(
            i_iter, frames, frame_files, iparams, postref_pres_set,
            miller_array_ref, avg_mode)
        if postref_good_pres_set:
            mdh, _txt_merge_postref = merge_frames(
                postref_good_pres_set,
                iparams,
                avg_mode=avg_mode,
                mtz_out_prefix='postref_cycle_' + str(i_iter + 1))
            miller_array_ref = mdh.miller_array_merge
            txt_merge_postref += _txt_merge_postref
        else:
            raise Usage(
                "Problem with post-refinement. No images refined. Please check your input file."
            )
    #3. collect caculating time
    time_global_end = datetime.now()
    time_global_spent = time_global_end - time_global_start
    txt_out_time_spent = 'Total calculation time: '+'{0:.2f}'.format(time_global_spent.seconds)+ \
        ' seconds\nFinished: '+time_global_end.strftime("%A %d. %B %Y %H:%M:%S")+'\n'
    print txt_out_time_spent
    txt_out = txt_indexing_ambiguity + txt_merge_mean + txt_merge_postref + txt_out_time_spent
    with open(os.path.join(iparams.run_no, 'log.txt'), 'a') as f:
        f.write(txt_out)
    with open(os.path.join(iparams.run_no, '.done'), 'w') as f:
        f.write('Done')
    return mdh
Exemple #7
0
if (__name__ == "__main__"):
    uc_tol = 3
    ry, rz, re, rotx, roty = (0, 0, 0.008, 0, 0)
    flag_beam_divergence = False
    lambda_template = flex.double(range(-50, 50, 1)) / 1000
    #0 .read input parameters and frames (pickle files)
    data, hklrefin, pixel_size_mm, target_unit_cell, \
      d_min, d_max = read_input(args = sys.argv[1:])
    frame_files = read_pickles(data)
    for pickle_filename in frame_files:
        observations_pickle = read_frame(pickle_filename)
        pickle_filename_arr = pickle_filename.split('/')
        pickle_filename_only = pickle_filename_arr[len(pickle_filename_arr) -
                                                   1]
        mxh = mx_handler()
        flag_hklisoin_found, miller_array_iso = mxh.get_miller_array_from_reflection_file(
            hklrefin)
        observations = observations_pickle["observations"][0]
        #check if the uc is good
        flag_good_unit_cell = good_unit_cell(
            observations.unit_cell().parameters(),
            None,
            uc_tol,
            target_unit_cell=target_unit_cell)
        #update lambda_set
        lambda_set = lambda_template + observations_pickle["wavelength"]
        crystal_init_orientation = observations_pickle["current_orientation"][
            0]
        detector_distance_mm = observations_pickle['distance']
        mm_predictions = pixel_size_mm * (
Exemple #8
0
    def run(self, args):
        #read inputs
        from prime.postrefine.mod_input import process_input, read_pickles
        iparams, txt_out_input = process_input(args)
        print txt_out_input
        with open(os.path.join(iparams.run_no, self.module_name, 'log.txt'),
                  'w') as f:
            f.write(txt_out_input)
        #read all integration pickles
        frame_files = read_pickles(iparams.data)
        n_frames = len(frame_files)
        if n_frames == 0:
            print "No integration pickle found. Exit program."
            return None, iparams
        #start
        if iparams.isoform_cluster.isorefin:
            #get collection of iso. ref. reflection set.
            mxh = mx_handler()
            miller_array_ref_set = []
            for isorefin in iparams.isoform_cluster.isorefin:
                flag_ref_found, miller_array_ref = mxh.get_miller_array_from_reflection_file(
                    isorefin)
                if flag_ref_found:
                    miller_array_ref_set.append(miller_array_ref)
            #get observation list
            frame_files_sel, obs_list = self.get_observation_set(
                iparams, frame_files, n_frames)
            if miller_array_ref_set:
                frames = [(i, frame_files_sel[i], obs_list[i], iparams,
                           miller_array_ref_set) for i in range(len(obs_list))]
                cc_results = pool_map(iterable=frames,
                                      func=solve_with_mtz_mproc,
                                      processes=iparams.n_processors)
                sol_pickle = {}
                for result in cc_results:
                    pickle_filename, cluster_id = result
                    sol_pickle[pickle_filename] = cluster_id
                write_out_solutions(iparams, sol_pickle)
                txt_out = "Cluster images with given " + str(
                    len(miller_array_ref_set)
                ) + " mtz files completed. Use cluster_0.lst - cluster_k.lst (for k clusters) for merging.\n"
                print txt_out
                with open(
                        os.path.join(iparams.run_no, self.module_name,
                                     'log.txt'), 'a') as f:
                    f.write(txt_out)
            return

        #*************************************************
        #solve with Brehm & Diederichs - sample size n_sample_frames then bootstrap the rest
        txt_out = "Cluster images with B&D algorithms.\n"
        frame_files_sel, obs_list = self.get_observation_set(
            iparams, frame_files, iparams.isoform_cluster.n_sample_frames)
        frames = [(i, frame_files_sel[i], obs_list[i], obs_list)
                  for i in range(len(frame_files_sel))]
        #calculate r
        print "Calculating R"
        calc_r_results = pool_map(iterable=frames,
                                  func=calculate_r_mproc,
                                  processes=iparams.n_processors)
        frame_files_sel = []
        r_matrix = []
        obs_list = []
        for result in calc_r_results:
            if result:
                pickle_filename, r_set, obs = result
                frame_files_sel.append(pickle_filename)
                obs_list.append(obs)
                if len(r_matrix) == 0:
                    r_matrix = r_set
                else:
                    r_matrix = np.append(r_matrix, r_set, axis=0)
        #choose groups with best R
        print "Selecting frames with best R"
        i_mean_r = np.argsort(np.mean(r_matrix, axis=1))[::-1]
        r_matrix_sorted = r_matrix[i_mean_r]
        frame_files_sorted = np.array(frame_files_sel)[i_mean_r]
        obs_list_sorted = np.array(obs_list)[i_mean_r]
        frame_files_sel = []
        obs_sel = []
        for frame_file, r_set, obs in zip(frame_files_sorted, r_matrix_sorted,
                                          obs_list_sorted):
            if frame_file not in frame_files_sel:
                frame_files_sel.append(frame_file)
                obs_sel.append(obs)
                print frame_file, np.mean(r_set)
                if len(frame_files_sel
                       ) >= iparams.isoform_cluster.n_selected_frames:
                    print 'Found all %6.0f good frames' % (
                        len(frame_files_sel))
                    break
        #Recalculate r for the new selected list
        frames = [(i, frame_files_sel[i], obs_sel[i], obs_sel)
                  for i in range(len(frame_files_sel))]
        print "Re-calculating R"
        calc_r_results = pool_map(iterable=frames,
                                  func=calculate_r_mproc,
                                  processes=iparams.n_processors)
        frame_files_sel = []
        r_matrix = []
        obs_list = []
        for result in calc_r_results:
            if result:
                pickle_filename, r_set, obs = result
                frame_files_sel.append(pickle_filename)
                obs_list.append(obs)
                if len(r_matrix) == 0:
                    r_matrix = r_set
                else:
                    r_matrix = np.append(r_matrix, r_set, axis=0)
        print "Minimizing frame distance"
        isoch = isoform_cluster_handler()
        x_set = isoch.optimize(r_matrix, flag_plot=iparams.flag_plot)
        print "Clustering results"
        kmh = kmeans_handler()
        k = iparams.isoform_cluster.n_clusters
        centroids, labels = kmh.run(x_set, k, flag_plot=iparams.flag_plot)
        print "Get solution pickle and cluster files list"
        sol_pickle, cluster_files = isoch.assign_cluster(frame_files_sel, labels, k, \
            os.path.join(iparams.run_no,self.module_name))
        #if more frames found, merge the sample frames to get a reference set
        #that can be used for breaking the ambiguity.
        if n_frames > iparams.isoform_cluster.n_selected_frames:
            print "Assign cluster_id for the remaining images."
            old_iparams_data = iparams.data[:]
            miller_array_ref_set = []
            from prime.command_line.postrefine import scale_frames, merge_frames
            for i in range(k):
                #generate a reference set from solved frames
                with open(cluster_files[i]) as f:
                    frame_files_processed = f.read().split('\n')[:-1]
                scaled_pres_set = scale_frames(
                    range(len(frame_files_processed)), frame_files_processed,
                    iparams)
                mdh, txt_merge_out = merge_frames(scaled_pres_set, iparams, \
                    mtz_out_prefix=os.path.join(self.module_name,'cluster_'+str(i)))
                miller_array_ref_set.append(mdh.miller_array_merge)
                txt_out += txt_merge_out
            #setup a list of remaining frames
            frame_files_remain = [
                frame for frame in frame_files if frame not in sol_pickle
            ]
            frame_files_remain_sel, obs_remain_sel_list = self.get_observation_set(iparams, \
                frame_files_remain, len(frame_files_remain))
            frames = [(i, frame_files_remain_sel[i], obs_remain_sel_list[i],
                       iparams, miller_array_ref_set)
                      for i in range(len(obs_remain_sel_list))]
            cc_results = pool_map(iterable=frames,
                                  func=solve_with_mtz_mproc,
                                  processes=iparams.n_processors)
            for result in cc_results:
                if result:
                    pickle_filename, cluster_id = result
                    sol_pickle[pickle_filename] = cluster_id
            iparams.data = old_iparams_data[:]
        #write out solution pickle
        write_out_solutions(iparams, sol_pickle)
        #write out text output
        txt = "Cluster images completed. Use cluster_0.lst - cluster_k.lst (for k clusters) for merging.\n"
        txt_out += txt
        print txt
        with open(os.path.join(iparams.run_no, self.module_name, 'log.txt'),
                  'a') as f:
            f.write(txt_out)

if (__name__ == "__main__"):
  uc_tol = 3
  ry, rz, re, rotx, roty = (0, 0, 0.008, 0, 0)
  flag_beam_divergence = False
  lambda_template = flex.double(range(-50,50,1))/1000
  #0 .read input parameters and frames (pickle files)
  data, hklrefin, pixel_size_mm, target_unit_cell, \
    d_min, d_max = read_input(args = sys.argv[1:])
  frame_files = read_pickles(data)
  for pickle_filename in frame_files:
    observations_pickle = pickle.load(open(pickle_filename,"rb"))
    pickle_filename_arr = pickle_filename.split('/')
    pickle_filename_only = pickle_filename_arr[len(pickle_filename_arr)-1]
    mxh = mx_handler()
    flag_hklisoin_found, miller_array_iso = mxh.get_miller_array_from_reflection_file(hklrefin)
    observations = observations_pickle["observations"][0]
    #check if the uc is good
    flag_good_unit_cell = good_unit_cell(observations.unit_cell().parameters(), None, uc_tol, target_unit_cell=target_unit_cell)
    #update lambda_set
    lambda_set = lambda_template + observations_pickle["wavelength"]
    crystal_init_orientation = observations_pickle["current_orientation"][0]
    detector_distance_mm = observations_pickle['distance']
    mm_predictions = pixel_size_mm*(observations_pickle['mapped_predictions'][0])
    xbeam = observations_pickle["xbeam"]
    ybeam = observations_pickle["ybeam"]
    alpha_angle = flex.double([math.atan(abs(pred[0]-xbeam)/abs(pred[1]-ybeam)) \
                                   for pred in mm_predictions])
    spot_pred_x_mm = flex.double([pred[0]-xbeam for pred in mm_predictions])
    spot_pred_y_mm = flex.double([pred[1]-ybeam for pred in mm_predictions])
Exemple #10
0
    def get_results(self, finished_objects=None):
        if not finished_objects:
            finished_objects = self.info.get_finished_objects()
            if not finished_objects:
                return False
        final_objects = []

        self.info.unplotted_stats = {}
        for key in self.info.stats:
            self.info.unplotted_stats[key] = dict(lst=[])

        for obj in finished_objects:
            item = [obj.input_index, obj.img_path, obj.img_index]
            if len(self.info.unprocessed) > 0 and item in self.info.unprocessed:
                self.info.unprocessed.remove(item)
            if (
                len(self.info.categories["not_processed"][0]) > 0
                and item in self.info.categories["not_processed"][0]
            ):
                self.info.categories["not_processed"][0].remove(item)

            if obj.fail:
                key = obj.fail.replace(" ", "_")
                if key in self.info.categories:
                    self.info.categories[key][0].append(item)
            else:
                self.info.categories["integrated"][0].append(obj.final["final"])
                self.info.final_objects.append(obj.obj_file)
                final_objects.append(obj)

            if not obj.fail or "triage" not in obj.fail:
                self.info.categories["have_diffraction"][0].append(obj.img_path)

        # Calculate processing stats from final objects
        if final_objects:
            self.info.pixel_size = final_objects[0].final["pixel_size"]

            # Get observations from file
            try:
                all_obs = ep.load(self.info.idx_file)
            except Exception:
                all_obs = None

            # Collect image processing stats
            for obj in final_objects:
                for key in self.info.stats:
                    if key in obj.final:
                        stat_tuple = (
                            obj.input_index,
                            obj.img_path,
                            obj.img_index,
                            obj.final[key],
                        )
                        self.info.stats[key]["lst"].append(stat_tuple)

                        # add proc filepath info to 'pointers'
                        pointer_dict = {
                            "img_file": obj.img_path,
                            "obj_file": obj.obj_file,
                            "img_index": obj.img_index,
                            "experiments": obj.eint_path,
                            "reflections": obj.rint_path,
                        }
                        self.info.pointers[str(obj.input_index)] = pointer_dict

                        if key not in self.info.unplotted_stats:
                            self.info.unplotted_stats[key] = dict(lst=[])
                        self.info.unplotted_stats[key]["lst"].append(stat_tuple)

                # Unit cells and space groups (i.e. cluster iterable)
                self.info.cluster_iterable.append(
                    [
                        float(obj.final["a"]),
                        float(obj.final["b"]),
                        float(obj.final["c"]),
                        float(obj.final["alpha"]),
                        float(obj.final["beta"]),
                        float(obj.final["gamma"]),
                        str(obj.final["sg"]),
                    ]
                )

                # Get observations from this image
                obs = None
                if "observations" in obj.final:
                    obs = obj.final["observations"].as_non_anomalous_array()
                else:
                    pickle_path = obj.final["final"]
                    if os.path.isfile(pickle_path):
                        try:
                            pickle = ep.load(pickle_path)
                            obs = pickle["observations"][0].as_non_anomalous_array()
                        except Exception as e:
                            print(
                                "IMAGE_PICKLE_ERROR for {}: {}".format(pickle_path, e)
                            )

                with util.Capturing():
                    if obs:
                        # Append observations to combined miller array
                        obs = obs.expand_to_p1()
                        if all_obs:
                            all_obs = all_obs.concatenate(
                                obs, assert_is_similar_symmetry=False
                            )
                        else:
                            all_obs = obs

                        # Get B-factor from this image
                        try:
                            mxh = mx_handler()
                            asu_contents = mxh.get_asu_contents(500)
                            observations_as_f = obs.as_amplitude_array()
                            observations_as_f.setup_binner(auto_binning=True)
                            wp = statistics.wilson_plot(
                                observations_as_f, asu_contents, e_statistics=True
                            )
                            b_factor = wp.wilson_b
                        except RuntimeError as e:
                            b_factor = 0
                            print("B_FACTOR_ERROR: ", e)
                        self.info.b_factors.append(b_factor)

            # Save collected observations to file
            if all_obs:
                ep.dump(self.info.idx_file, all_obs)

            # Calculate dataset stats
            for k in self.info.stats:
                stat_list = list(zip(*self.info.stats[k]["lst"]))[3]
                stats = dict(
                    lst=self.info.stats[k]["lst"],
                    median=np.median(stat_list).item(),
                    mean=np.mean(stat_list).item(),
                    std=np.std(stat_list).item(),
                    max=np.max(stat_list).item(),
                    min=np.min(stat_list).item(),
                    cons=Counter(stat_list).most_common(1)[0][0],
                )
                self.info.stats[k].update(stats)
            return True
        else:
            return False