def run(show_plots,args): from xfel.command_line.cxi_merge import master_phil phil = iotbx.phil.process_command_line(args=args, master_string=master_phil).show() work_params = phil.work.extract() from xfel.merging.phil_validation import application,samosa application(work_params) samosa(work_params) if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return datadir = "." written_files = [] if work_params.levmar.compute_cc_half: for half_data_flag in [1,2,0]: case = execute_case(datadir, work_params, plot=show_plots, half_data_flag=half_data_flag) assert len(case.Fit_I)==len(case.ordered_intensities.indices())==len(case.reference_millers.indices()) model_subset = case.reference_millers[0:len(case.Fit_I)] fitted_miller_array = miller.array (miller_set = model_subset, data = case.Fit_I, sigmas = case.Fit_I_stddev) fitted_miller_array.set_observation_type_xray_intensity() output_result = fitted_miller_array.select(case.I_visited==1) outfile = "%s_s%1d_levmar.mtz"%(work_params.output.prefix,half_data_flag) output_result.show_summary(prefix="%s: "%outfile) mtz_out = output_result.as_mtz_dataset(column_root_label="Iobs",title=outfile,wavelength=None) mtz_obj = mtz_out.mtz_object() mtz_obj.write(outfile) written_files.append(outfile) print "OK s%1d"%half_data_flag #raw_input("OK?") """Guest code to retrieve the modified orientations after rotational fitting is done""" if "Rxy" in work_params.levmar.parameter_flags: all_A = [e.crystal.get_A() for e in case.experiments.get_experiments()] all_files = case.experiments.get_files() all_x = case.Fit["Ax"] all_y = case.Fit["Ay"] from scitbx import matrix x_axis = matrix.col((1.,0.,0.)) y_axis = matrix.col((0.,1.,0.)) out = open("aaaaa","w") for x in xrange(len(all_A)): Rx = x_axis.axis_and_angle_as_r3_rotation_matrix(angle=all_x[x], deg=True) Ry = y_axis.axis_and_angle_as_r3_rotation_matrix(angle=all_y[x], deg=True) modified_A = Rx * Ry * all_A[x] filename = all_files[x] print >>out, filename, " ".join([str(a) for a in modified_A.elems]) work_params.scaling.algorithm="levmar" from xfel.cxi.cxi_cc import run_cc run_cc(work_params,work_params.model_reindex_op,sys.stdout) else: execute_case(datadir, work_params, plot=show_plots)
def run(show_plots,args): from xfel.command_line.cxi_merge import master_phil phil = iotbx.phil.process_command_line(args=args, master_string=master_phil).show() work_params = phil.work.extract() from xfel.merging.phil_validation import application,samosa application(work_params) samosa(work_params) if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return datadir = "." written_files = [] if work_params.levmar.compute_cc_half: for half_data_flag in [1,2,0]: case = execute_case(datadir, work_params, plot=show_plots, half_data_flag=half_data_flag) assert len(case.Fit_I)==len(case.ordered_intensities.indices())==len(case.reference_millers.indices()) model_subset = case.reference_millers[0:len(case.Fit_I)] fitted_miller_array = miller.array (miller_set = model_subset, data = case.Fit_I, sigmas = case.Fit_I_stddev) fitted_miller_array.set_observation_type_xray_intensity() output_result = fitted_miller_array.select(case.I_visited==1) outfile = "%s_s%1d_levmar.mtz"%(work_params.output.prefix,half_data_flag) output_result.show_summary(prefix="%s: "%outfile) mtz_out = output_result.as_mtz_dataset(column_root_label="Iobs",title=outfile,wavelength=None) mtz_obj = mtz_out.mtz_object() mtz_obj.write(outfile) written_files.append(outfile) print "OK s%1d"%half_data_flag #raw_input("OK?") """Guest code to retrieve the modified orientations after rotational fitting is done""" if "Rxy" in work_params.levmar.parameter_flags: all_A = [e.crystal.get_A() for e in case.experiments.get_experiments()] all_files = case.experiments.get_files() all_x = case.Fit["Ax"] all_y = case.Fit["Ay"] from scitbx import matrix x_axis = matrix.col((1.,0.,0.)) y_axis = matrix.col((0.,1.,0.)) out = open("aaaaa","w") for x in xrange(len(all_A)): Rx = x_axis.axis_and_angle_as_r3_rotation_matrix(angle=all_x[x], deg=True) Ry = y_axis.axis_and_angle_as_r3_rotation_matrix(angle=all_y[x], deg=True) modified_A = Rx * Ry * all_A[x] filename = all_files[x] print >>out, filename, " ".join([str(a) for a in modified_A.elems]) work_params.scaling.algorithm="levmar" from xfel.cxi.cxi_cc import run_cc run_cc(work_params,work_params.model_reindex_op,sys.stdout)
def run(args): phil = iotbx.phil.process_command_line(args=args, master_string=master_phil).show() work_params = phil.work.extract() from xfel.merging.phil_validation import application, samosa application(work_params) samosa(work_params) if ("--help" in args): libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None)): command_name = os.environ["LIBTBX_DISPATCHER_NAME"] raise Usage(command_name + " " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)): raise Usage( "If rescale_with_average_cell=True, you must also specify " + "set_average_unit_cell=True.") if work_params.raw_data.sdfac_auto and work_params.raw_data.sdfac_refine: raise Usage("Cannot specify both sdfac_auto and sdfac_refine") # Read Nat's reference model from an MTZ file. XXX The observation # type is given as F, not I--should they be squared? Check with Nat! log = open("%s.log" % work_params.output.prefix, "w") out = multi_out() out.register("log", log, atexit_send_to=None) out.register("stdout", sys.stdout) print >> out, "I model" if work_params.model is not None: from xfel.merging.general_fcalc import run i_model = run(work_params) work_params.target_unit_cell = i_model.unit_cell() work_params.target_space_group = i_model.space_group_info() i_model.show_summary() else: i_model = None print >> out, "Target unit cell and space group:" print >> out, " ", work_params.target_unit_cell print >> out, " ", work_params.target_space_group miller_set, i_model = consistent_set_and_model(work_params, i_model) frame_files = get_observations(work_params) scaler = scaling_manager(miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.scale_all(frame_files) if scaler.n_accepted == 0: return None scaler.show_unit_cell_histograms() if (work_params.rescale_with_average_cell): average_cell_abc = scaler.uc_values.get_average_cell_dimensions() average_cell = uctbx.unit_cell( list(average_cell_abc) + list(work_params.target_unit_cell.parameters()[3:])) work_params.target_unit_cell = average_cell print >> out, "" print >> out, "#" * 80 print >> out, "RESCALING WITH NEW TARGET CELL" print >> out, " average cell: %g %g %g %g %g %g" % \ work_params.target_unit_cell.parameters() print >> out, "" scaler.reset() scaler.scale_all(frame_files) scaler.show_unit_cell_histograms() if False: #(work_params.output.show_plots) : try: plot_overall_completeness(completeness) except Exception as e: print "ERROR: can't show plots" print " %s" % str(e) print >> out, "\n" # Sum the observations of I and I/sig(I) for each reflection. sum_I = flex.double(miller_set.size(), 0.) sum_I_SIGI = flex.double(miller_set.size(), 0.) for i in xrange(miller_set.size()): index = miller_set.indices()[i] if index in scaler.ISIGI: for t in scaler.ISIGI[index]: sum_I[i] += t[0] sum_I_SIGI[i] += t[1] miller_set_avg = miller_set.customized_copy( unit_cell=work_params.target_unit_cell) table1 = show_overall_observations(obs=miller_set_avg, redundancy=scaler.completeness, summed_wt_I=scaler.summed_wt_I, summed_weight=scaler.summed_weight, ISIGI=scaler.ISIGI, n_bins=work_params.output.n_bins, title="Statistics for all reflections", out=out, work_params=work_params) print >> out, "" n_refl, corr = ((scaler.completeness > 0).count(True), 0) print >> out, "\n" table2 = show_overall_observations( obs=miller_set_avg, redundancy=scaler.summed_N, summed_wt_I=scaler.summed_wt_I, summed_weight=scaler.summed_weight, ISIGI=scaler.ISIGI, n_bins=work_params.output.n_bins, title="Statistics for reflections where I > 0", out=out, work_params=work_params) #from libtbx import easy_pickle #easy_pickle.dump(file_name="stats.pickle", obj=stats) #stats.report(plot=work_params.plot) #miller_counts = miller_set_p1.array(data=stats.counts.as_double()).select( # stats.counts != 0) #miller_counts.as_mtz_dataset(column_root_label="NOBS").mtz_object().write( # file_name="nobs.mtz") if work_params.data_subsubsets.subsubset is not None and work_params.data_subsubsets.subsubset_total is not None: easy_pickle.dump( "scaler_%d.pickle" % work_params.data_subsubsets.subsubset, scaler) print >> out, "" mtz_file, miller_array = scaler.finalize_and_save_data() #table_pickle_file = "%s_graphs.pkl" % work_params.output.prefix #easy_pickle.dump(table_pickle_file, [table1, table2]) loggraph_file = os.path.abspath("%s_graphs.log" % work_params.output.prefix) f = open(loggraph_file, "w") f.write(table1.format_loggraph()) f.write("\n") f.write(table2.format_loggraph()) f.close() result = scaling_result(miller_array=miller_array, plots=scaler.get_plot_statistics(), mtz_file=mtz_file, loggraph_file=loggraph_file, obs_table=table1, all_obs_table=table2, n_reflections=n_refl, overall_correlation=corr) easy_pickle.dump("%s.pkl" % work_params.output.prefix, result) return result
def find_merge_common_images(args): phil = iotbx.phil.process_command_line(args = args, master_string = master_phil).show() work_params = phil.work.extract() if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ((work_params.model is None) and work_params.scaling.algorithm != "mark1")) : raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)) : raise Usage( "If rescale_with_average_cell=True, you must also specify "+ "set_average_unit_cell=True.") # Read Nat's reference model from an MTZ file. XXX The observation # type is given as F, not I--should they be squared? Check with Nat! log = open("%s_%s_scale.log" % (work_params.output.prefix, work_params.scaling.algorithm), "w") out = multi_out() out.register("log", log, atexit_send_to=None) out.register("stdout", sys.stdout) print >> out, "Target unit cell and space group:" print >> out, " ", work_params.target_unit_cell print >> out, " ", work_params.target_space_group uc = work_params.target_unit_cell miller_set = symmetry( unit_cell=work_params.target_unit_cell, space_group_info=work_params.target_space_group ).build_miller_set( anomalous_flag=not work_params.merge_anomalous, d_min=work_params.d_min) print 'Miller set size: %d' % len(miller_set.indices()) from xfel.cxi.merging.general_fcalc import random_structure i_model = random_structure(work_params) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager( miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.read_all() print "finished reading" sg = miller_set.space_group() pg = sg.build_derived_laue_group() miller_set.show_summary() hkl_asu = scaler.observations["hkl_id"] imageno = scaler.observations["frame_id"] intensi = scaler.observations["i"] sigma_i = scaler.observations["sigi"] lookup = scaler.millers["merged_asu_hkl"] # construct table of start / end indices for frames: now using Python # range indexing starts = [0] ends = [] for x in xrange(1, len(scaler.observations["hkl_id"])): if imageno[x] != imageno[x - 1]: ends.append(x) starts.append(x) ends.append(len(scaler.observations["hkl_id"])) keep_start = [] keep_end = [] def nint(a): return int(round(a)) from collections import defaultdict i_scale = 0.1 i_hist = defaultdict(int) for j, se in enumerate(zip(starts, ends)): s, e = se for i in intensi[s:e]: i_hist[nint(i_scale * i)] += 1 isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s) dmin = 100.0 for x in xrange(s, e): d = uc.d(lookup[hkl_asu[x]]) if d < dmin: dmin = d if isig > 6.0 and dmin < 3.2: keep_start.append(s) keep_end.append(e) fout = open('i_hist.dat', 'w') for i in i_hist: fout.write('%.2f %d\n' % (i / i_scale, i_hist[i])) fout.close() starts = keep_start ends = keep_end print 'Keeping %d frames' % len(starts) frames = [] odd = 0 even = 0 for s, e in zip(starts, ends): for x in range(s, e): hkl = lookup[hkl_asu[x]] if (hkl[0] + hkl[1] + hkl[2]) % 2 == 1: odd += 1 else: even += 1 indices = [tuple(lookup[hkl_asu[x]]) for x in range(s, e)] intensities = intensi[s:e] sigmas = sigma_i[s:e] frames.append(Frame(uc, indices, intensities, sigmas)) # pre-scale the data - first determine average ln(k), B; then apply kbs = [f.kb() for f in frames] mn_k = sum([kb[0] for kb in kbs]) / len(kbs) mn_B = sum([kb[1] for kb in kbs]) / len(kbs) n_lt_500 = 0 n_gt_500 = 0 for j, f in enumerate(frames): s_i = f.scale_to_kb(mn_k, mn_B) fout = open('frame-s-i-%05d.dat' % j, 'w') for s, i, si in s_i: fout.write('%f %f %f\n' % (s, i, si)) if i < 500: n_lt_500 += 1 else: n_gt_500 += 1 fout.close() from collections import defaultdict hist = defaultdict(int) fout = open('kb.dat', 'w') for j, f in enumerate(frames): kb = f.kb() fout.write('%4d %6.3f %6.3f\n' % (j, kb[0], kb[1])) hist[int(round(kb[1]))] += 1 fout.close() for b in sorted(hist): print b, hist[b] print odd, even print n_lt_500, n_gt_500 return
def run(args): phil = iotbx.phil.process_command_line(args=args, master_string=master_phil).show() work_params = phil.work.extract() from xfel.merging.phil_validation import application application(work_params) if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ( (work_params.model is None) and work_params.scaling.algorithm != "mark1") ) : raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)) : raise Usage("If rescale_with_average_cell=True, you must also specify "+ "set_average_unit_cell=True.") if work_params.raw_data.sdfac_auto and work_params.raw_data.sdfac_refine: raise Usage("Cannot specify both sdfac_auto and sdfac_refine") log = open("%s_%s.log" % (work_params.output.prefix,work_params.scaling.algorithm), "w") out = multi_out() out.register("log", log, atexit_send_to=None) out.register("stdout", sys.stdout) # Verify that the externally supplied isomorphous reference, if # present, defines a suitable column of intensities, and exit with # error if it does not. Then warn if it is necessary to generate # Bijvoet mates. Failure to catch these issues here would lead to # possibly obscure problems in cxi/cxi_cc.py later on. try: data_SR = mtz.object(work_params.scaling.mtz_file) except RuntimeError: pass else: array_SR = None obs_labels = [] for array in data_SR.as_miller_arrays(): this_label = array.info().label_string().lower() if array.observation_type() is not None: obs_labels.append(this_label.split(',')[0]) if this_label.find('fobs')>=0: array_SR = array.as_intensity_array() break if this_label.find('imean')>=0: array_SR = array.as_intensity_array() break if this_label.find(work_params.scaling.mtz_column_F)==0: array_SR = array.as_intensity_array() break if array_SR is None: known_labels = ['fobs', 'imean', work_params.scaling.mtz_column_F] raise Usage(work_params.scaling.mtz_file + " does not contain any observations labelled [" + ", ".join(known_labels) + "]. Please set scaling.mtz_column_F to one of [" + ",".join(obs_labels) + "].") elif not work_params.merge_anomalous and not array_SR.anomalous_flag(): print >> out, "Warning: Preserving anomalous contributors, but %s " \ "has anomalous contributors merged. Generating identical Bijvoet " \ "mates." % work_params.scaling.mtz_file # Read Nat's reference model from an MTZ file. XXX The observation # type is given as F, not I--should they be squared? Check with Nat! print >> out, "I model" if work_params.model is not None: from xfel.merging.general_fcalc import run i_model = run(work_params) work_params.target_unit_cell = i_model.unit_cell() work_params.target_space_group = i_model.space_group_info() i_model.show_summary() else: i_model = None print >> out, "Target unit cell and space group:" print >> out, " ", work_params.target_unit_cell print >> out, " ", work_params.target_space_group miller_set = symmetry( unit_cell=work_params.target_unit_cell, space_group_info=work_params.target_space_group ).build_miller_set( anomalous_flag=not work_params.merge_anomalous, d_max=work_params.d_max, d_min=work_params.d_min / math.pow( 1 + work_params.unit_cell_length_tolerance, 1 / 3)) miller_set = miller_set.change_basis( work_params.model_reindex_op).map_to_asu() if i_model is not None: matches = miller.match_indices(i_model.indices(), miller_set.indices()) assert not matches.have_singles() miller_set = miller_set.select(matches.permutation()) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager( miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.scale_all() if scaler.n_accepted == 0: return None # --- End of x scaling scaler.uc_values = unit_cell_distribution() for icell in xrange(len(scaler.frames["unit_cell"])): if scaler.params.model is None: scaler.uc_values.add_cell( unit_cell=scaler.frames["unit_cell"][icell]) else: scaler.uc_values.add_cell( unit_cell=scaler.frames["unit_cell"][icell], rejected=(scaler.frames["cc"][icell] < scaler.params.min_corr)) scaler.show_unit_cell_histograms() if (work_params.rescale_with_average_cell) : average_cell_abc = scaler.uc_values.get_average_cell_dimensions() average_cell = uctbx.unit_cell(list(average_cell_abc) + list(work_params.target_unit_cell.parameters()[3:])) work_params.target_unit_cell = average_cell print >> out, "" print >> out, "#" * 80 print >> out, "RESCALING WITH NEW TARGET CELL" print >> out, " average cell: %g %g %g %g %g %g" % \ work_params.target_unit_cell.parameters() print >> out, "" scaler.reset() scaler = xscaling_manager( miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.scale_all() scaler.uc_values = unit_cell_distribution() for icell in xrange(len(scaler.frames["unit_cell"])): if scaler.params.model is None: scaler.uc_values.add_cell( unit_cell=scaler.frames["unit_cell"][icell]) else: scaler.uc_values.add_cell( unit_cell=scaler.frames["unit_cell"][icell], rejected=(scaler.frames["cc"][icell] < scaler.params.min_corr)) scaler.show_unit_cell_histograms() if False : #(work_params.output.show_plots) : try : plot_overall_completeness(completeness) except Exception, e : print "ERROR: can't show plots" print " %s" % str(e)
def run(args): phil = iotbx.phil.process_command_line(args=args, master_string=master_phil).show() work_params = phil.work.extract() from xfel.merging.phil_validation import application application(work_params) if ("--help" in args): libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ((work_params.model is None) and work_params.scaling.algorithm != "mark1")): raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)): raise Usage( "If rescale_with_average_cell=True, you must also specify " + "set_average_unit_cell=True.") if work_params.raw_data.sdfac_auto and work_params.raw_data.sdfac_refine: raise Usage("Cannot specify both sdfac_auto and sdfac_refine") log = open( "%s_%s.log" % (work_params.output.prefix, work_params.scaling.algorithm), "w") out = multi_out() out.register("log", log, atexit_send_to=None) out.register("stdout", sys.stdout) # Verify that the externally supplied isomorphous reference, if # present, defines a suitable column of intensities, and exit with # error if it does not. Then warn if it is necessary to generate # Bijvoet mates. Failure to catch these issues here would lead to # possibly obscure problems in cxi/cxi_cc.py later on. try: data_SR = mtz.object(work_params.scaling.mtz_file) except RuntimeError: pass else: array_SR = None obs_labels = [] for array in data_SR.as_miller_arrays(): this_label = array.info().label_string().lower() if array.observation_type() is not None: obs_labels.append(this_label.split(',')[0]) if this_label.find('fobs') >= 0: array_SR = array.as_intensity_array() break if this_label.find('imean') >= 0: array_SR = array.as_intensity_array() break if this_label.find(work_params.scaling.mtz_column_F) == 0: array_SR = array.as_intensity_array() break if array_SR is None: known_labels = ['fobs', 'imean', work_params.scaling.mtz_column_F] raise Usage(work_params.scaling.mtz_file + " does not contain any observations labelled [" + ", ".join(known_labels) + "]. Please set scaling.mtz_column_F to one of [" + ",".join(obs_labels) + "].") elif not work_params.merge_anomalous and not array_SR.anomalous_flag(): print >> out, "Warning: Preserving anomalous contributors, but %s " \ "has anomalous contributors merged. Generating identical Bijvoet " \ "mates." % work_params.scaling.mtz_file # Read Nat's reference model from an MTZ file. XXX The observation # type is given as F, not I--should they be squared? Check with Nat! print >> out, "I model" if work_params.model is not None: from xfel.merging.general_fcalc import run i_model = run(work_params) work_params.target_unit_cell = i_model.unit_cell() work_params.target_space_group = i_model.space_group_info() i_model.show_summary() else: i_model = None print >> out, "Target unit cell and space group:" print >> out, " ", work_params.target_unit_cell print >> out, " ", work_params.target_space_group miller_set, i_model = consistent_set_and_model(work_params, i_model) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager(miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.scale_all() if scaler.n_accepted == 0: return None # --- End of x scaling scaler.uc_values = unit_cell_distribution() for icell in xrange(len(scaler.frames["unit_cell"])): if scaler.params.model is None: scaler.uc_values.add_cell( unit_cell=scaler.frames["unit_cell"][icell]) else: scaler.uc_values.add_cell( unit_cell=scaler.frames["unit_cell"][icell], rejected=(scaler.frames["cc"][icell] < scaler.params.min_corr)) scaler.show_unit_cell_histograms() if (work_params.rescale_with_average_cell): average_cell_abc = scaler.uc_values.get_average_cell_dimensions() average_cell = uctbx.unit_cell( list(average_cell_abc) + list(work_params.target_unit_cell.parameters()[3:])) work_params.target_unit_cell = average_cell print >> out, "" print >> out, "#" * 80 print >> out, "RESCALING WITH NEW TARGET CELL" print >> out, " average cell: %g %g %g %g %g %g" % \ work_params.target_unit_cell.parameters() print >> out, "" scaler.reset() scaler = xscaling_manager(miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.scale_all() scaler.uc_values = unit_cell_distribution() for icell in xrange(len(scaler.frames["unit_cell"])): if scaler.params.model is None: scaler.uc_values.add_cell( unit_cell=scaler.frames["unit_cell"][icell]) else: scaler.uc_values.add_cell( unit_cell=scaler.frames["unit_cell"][icell], rejected=(scaler.frames["cc"][icell] < scaler.params.min_corr)) scaler.show_unit_cell_histograms() if False: #(work_params.output.show_plots) : try: plot_overall_completeness(completeness) except Exception, e: print "ERROR: can't show plots" print " %s" % str(e)
def run(args): phil = iotbx.phil.process_command_line( args = args, master_string = master_phil) work_params = phil.work.extract() if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ((work_params.model is None) and work_params.scaling.algorithm != "mark1")): raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)) : raise Usage("If rescale_with_average_cell=True, you must also specify "+ "set_average_unit_cell=True.") miller_set = symmetry( unit_cell = work_params.target_unit_cell, space_group_info = work_params.target_space_group ).build_miller_set( anomalous_flag = not work_params.merge_anomalous, d_min = work_params.d_min) from xfel.cxi.merging.general_fcalc import random_structure i_model = random_structure(work_params) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager( miller_set = miller_set, i_model = i_model, params = work_params) scaler.read_all() sg = miller_set.space_group() pg = sg.build_derived_laue_group() rational_ops = [] for symop in pg: rational_ops.append((matrix.sqr(symop.r().transpose().as_rational()), symop.r().as_hkl())) # miller_set.show_summary() uc = work_params.target_unit_cell hkl_asu = scaler.observations["hkl_id"] imageno = scaler.observations["frame_id"] intensi = scaler.observations["i"] sigma_i = scaler.observations["sigi"] lookup = scaler.millers["merged_asu_hkl"] origH = scaler.observations["H"] origK = scaler.observations["K"] origL = scaler.observations["L"] from cctbx.miller import map_to_asu sgtype = miller_set.space_group_info().type() aflag = miller_set.anomalous_flag() from cctbx.array_family import flex # FIXME in here perform the mapping to ASU for both the original and other # index as an array-wise manipulation to make things a bunch faster... # however this also uses a big chunk of RAM... FIXME also in here use # cb_op.apply(indices) to get the indices reindexed... original_indices = flex.miller_index() for x in xrange(len(scaler.observations["hkl_id"])): original_indices.append(lookup[hkl_asu[x]]) from cctbx.sgtbx import change_of_basis_op I23 = change_of_basis_op('k, -h, l') other_indices = I23.apply(original_indices) map_to_asu(sgtype, aflag, original_indices) map_to_asu(sgtype, aflag, other_indices) # FIXME would be useful in here to have a less expensive way of finding the # symmetry operation which gave the map to the ASU - perhaps best way is to # make a new C++ map_to_asu which records this. # FIXME in here recover the original frame structure of the data to # logical frame objetcs - N.B. the frame will need to be augmented to test # alternative indexings # construct table of start / end indices for frames: now using Python # range indexing starts = [0] ends = [] for x in xrange(1, len(scaler.observations["hkl_id"])): if imageno[x] != imageno[x - 1]: ends.append(x) starts.append(x) ends.append(len(scaler.observations["hkl_id"])) keep_start = [] keep_end = [] for j, se in enumerate(zip(starts, ends)): print 'processing frame %d: %d to %d' % (j, se[0], se[1]) s, e = se isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s) dmin = 100.0 for x in xrange(s, e): d = uc.d(lookup[hkl_asu[x]]) if d < dmin: dmin = d if isig > 6.0 and dmin < 3.2: keep_start.append(s) keep_end.append(e) starts = keep_start ends = keep_end print 'Keeping %d frames' % len(starts) # then start running the comparison code frames = [] for s, e in zip(starts, ends): # FIXME need this from remap to ASU misym = [0 for x in range(s, e)] indices = [original_indices[x] for x in range(s, e)] other = [other_indices[x] for x in range(s, e)] intensities = intensi[s:e] sigmas = sigma_i[s:e] frames.append(Frame(uc, indices, other, intensities, sigmas)) reference = FrameFromReferenceMTZ() fout = open('cc_reference.log', 'w') for j, f in enumerate(frames): _cc = reference.cc(f) _oo = reference.cc_other(f) print '%d %d %d %d %f %d %f' % (j, starts[j], ends[j], _cc[0], _cc[1], _oo[0], _oo[1]) fout.write('%d %d %d %d %f %d %f\n' % (j, starts[j], ends[j], _cc[0], _cc[1], _oo[0], _oo[1])) fout.close() return
def run(args): phil = iotbx.phil.process_command_line(args=args, master_string=master_phil).show() work_params = phil.work.extract() from xfel.merging.phil_validation import application, samosa application(work_params) samosa(work_params) if ("--help" in args): libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None)): command_name = os.environ["LIBTBX_DISPATCHER_NAME"] raise Usage(command_name + " " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)): raise Usage( "If rescale_with_average_cell=True, you must also specify " + "set_average_unit_cell=True.") if work_params.raw_data.sdfac_auto and work_params.raw_data.sdfac_refine: raise Usage("Cannot specify both sdfac_auto and sdfac_refine") # Read Nat's reference model from an MTZ file. XXX The observation # type is given as F, not I--should they be squared? Check with Nat! log = open("%s.log" % work_params.output.prefix, "w") out = multi_out() out.register("log", log, atexit_send_to=None) out.register("stdout", sys.stdout) print >> out, "I model" if work_params.model is not None: from xfel.merging.general_fcalc import run i_model = run(work_params) work_params.target_unit_cell = i_model.unit_cell() work_params.target_space_group = i_model.space_group_info() i_model.show_summary() else: i_model = None print >> out, "Target unit cell and space group:" print >> out, " ", work_params.target_unit_cell print >> out, " ", work_params.target_space_group miller_set, i_model = consistent_set_and_model(work_params, i_model) frame_files = get_observations(work_params) scaler = scaling_manager(miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.scale_all(frame_files) if scaler.n_accepted == 0: return None scaler.show_unit_cell_histograms() if (work_params.rescale_with_average_cell): average_cell_abc = scaler.uc_values.get_average_cell_dimensions() average_cell = uctbx.unit_cell( list(average_cell_abc) + list(work_params.target_unit_cell.parameters()[3:])) work_params.target_unit_cell = average_cell print >> out, "" print >> out, "#" * 80 print >> out, "RESCALING WITH NEW TARGET CELL" print >> out, " average cell: %g %g %g %g %g %g" % \ work_params.target_unit_cell.parameters() print >> out, "" scaler.reset() scaler.scale_all(frame_files) scaler.show_unit_cell_histograms() if False: #(work_params.output.show_plots) : try: plot_overall_completeness(completeness) except Exception, e: print "ERROR: can't show plots" print " %s" % str(e)
def run(args): phil = iotbx.phil.process_command_line(args=args, master_string=master_phil).show() work_params = phil.work.extract() from xfel.merging.phil_validation import application application(work_params) if ("--help" in args): libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ((work_params.model is None) and work_params.scaling.algorithm != "mark1")): raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)): raise Usage( "If rescale_with_average_cell=True, you must also specify " + "set_average_unit_cell=True.") if work_params.raw_data.sdfac_auto and work_params.raw_data.sdfac_refine: raise Usage("Cannot specify both sdfac_auto and sdfac_refine") if not work_params.include_negatives_fix_27May2018: work_params.include_negatives = False # use old behavior log = open( "%s_%s.log" % (work_params.output.prefix, work_params.scaling.algorithm), "w") out = multi_out() out.register("log", log, atexit_send_to=None) out.register("stdout", sys.stdout) # Verify that the externally supplied isomorphous reference, if # present, defines a suitable column of intensities, and exit with # error if it does not. Then warn if it is necessary to generate # Bijvoet mates. Failure to catch these issues here would lead to # possibly obscure problems in cxi/cxi_cc.py later on. try: data_SR = mtz.object(work_params.scaling.mtz_file) except RuntimeError: pass else: array_SR = None obs_labels = [] for array in data_SR.as_miller_arrays(): this_label = array.info().label_string().lower() if array.observation_type() is not None: obs_labels.append(this_label.split(',')[0]) if this_label.find('fobs') >= 0: array_SR = array.as_intensity_array() break if this_label.find('imean') >= 0: array_SR = array.as_intensity_array() break if this_label.find(work_params.scaling.mtz_column_F) == 0: array_SR = array.as_intensity_array() break if array_SR is None: known_labels = ['fobs', 'imean', work_params.scaling.mtz_column_F] raise Usage(work_params.scaling.mtz_file + " does not contain any observations labelled [" + ", ".join(known_labels) + "]. Please set scaling.mtz_column_F to one of [" + ",".join(obs_labels) + "].") elif not work_params.merge_anomalous and not array_SR.anomalous_flag(): print("Warning: Preserving anomalous contributors, but %s " \ "has anomalous contributors merged. Generating identical Bijvoet " \ "mates." % work_params.scaling.mtz_file, file=out) # Read Nat's reference model from an MTZ file. XXX The observation # type is given as F, not I--should they be squared? Check with Nat! print("I model", file=out) if work_params.model is not None: from xfel.merging.general_fcalc import run i_model = run(work_params) work_params.target_unit_cell = i_model.unit_cell() work_params.target_space_group = i_model.space_group_info() i_model.show_summary() else: i_model = None print("Target unit cell and space group:", file=out) print(" ", work_params.target_unit_cell, file=out) print(" ", work_params.target_space_group, file=out) miller_set, i_model = consistent_set_and_model(work_params, i_model) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager(miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.scale_all() if scaler.n_accepted == 0: return None # --- End of x scaling scaler.uc_values = unit_cell_distribution() for icell in range(len(scaler.frames["unit_cell"])): if scaler.params.model is None: scaler.uc_values.add_cell( unit_cell=scaler.frames["unit_cell"][icell]) else: scaler.uc_values.add_cell( unit_cell=scaler.frames["unit_cell"][icell], rejected=(scaler.frames["cc"][icell] < scaler.params.min_corr)) scaler.show_unit_cell_histograms() if (work_params.rescale_with_average_cell): average_cell_abc = scaler.uc_values.get_average_cell_dimensions() average_cell = uctbx.unit_cell( list(average_cell_abc) + list(work_params.target_unit_cell.parameters()[3:])) work_params.target_unit_cell = average_cell print("", file=out) print("#" * 80, file=out) print("RESCALING WITH NEW TARGET CELL", file=out) print(" average cell: %g %g %g %g %g %g" % \ work_params.target_unit_cell.parameters(), file=out) print("", file=out) scaler.reset() scaler = xscaling_manager(miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.scale_all() scaler.uc_values = unit_cell_distribution() for icell in range(len(scaler.frames["unit_cell"])): if scaler.params.model is None: scaler.uc_values.add_cell( unit_cell=scaler.frames["unit_cell"][icell]) else: scaler.uc_values.add_cell( unit_cell=scaler.frames["unit_cell"][icell], rejected=(scaler.frames["cc"][icell] < scaler.params.min_corr)) scaler.show_unit_cell_histograms() if False: #(work_params.output.show_plots) : try: plot_overall_completeness(completeness) except Exception as e: print("ERROR: can't show plots") print(" %s" % str(e)) print("\n", file=out) reserve_prefix = work_params.output.prefix for data_subset in [1, 2, 0]: work_params.data_subset = data_subset work_params.output.prefix = "%s_s%1d_%s" % ( reserve_prefix, data_subset, work_params.scaling.algorithm) if work_params.data_subset == 0: scaler.frames["data_subset"] = flex.bool( scaler.frames["frame_id"].size(), True) elif work_params.data_subset == 1: scaler.frames["data_subset"] = scaler.frames["odd_numbered"] elif work_params.data_subset == 2: scaler.frames["data_subset"] = scaler.frames[ "odd_numbered"] == False # --------- New code ------------------ #sanity check for mod, obs in zip(miller_set.indices(), scaler.millers["merged_asu_hkl"]): if mod != obs: raise Exception( "miller index lists inconsistent--check d_min are equal for merge and xmerge scripts" ) assert mod == obs """Sum the observations of I and I/sig(I) for each reflection. sum_I = flex.double(i_model.size(), 0.) sum_I_SIGI = flex.double(i_model.size(), 0.) scaler.completeness = flex.int(i_model.size(), 0) scaler.summed_N = flex.int(i_model.size(), 0) scaler.summed_wt_I = flex.double(i_model.size(), 0.) scaler.summed_weight = flex.double(i_model.size(), 0.) scaler.n_rejected = flex.double(scaler.frames["frame_id"].size(), 0.) scaler.n_obs = flex.double(scaler.frames["frame_id"].size(), 0.) scaler.d_min_values = flex.double(scaler.frames["frame_id"].size(), 0.) scaler.ISIGI = {}""" from xfel import scaling_results, get_scaling_results, get_isigi_dict results = scaling_results(scaler._observations, scaler._frames, scaler.millers["merged_asu_hkl"], scaler.frames["data_subset"], work_params.include_negatives) results.__getattribute__(work_params.scaling.algorithm)( scaler.params.min_corr, scaler.params.target_unit_cell) sum_I, sum_I_SIGI, \ scaler.completeness, scaler.summed_N, \ scaler.summed_wt_I, scaler.summed_weight, scaler.n_rejected, scaler.n_obs, \ scaler.d_min_values, hkl_ids, i_sigi_list = get_scaling_results(results) scaler.ISIGI = get_isigi_dict(results) if work_params.merging.refine_G_Imodel: from xfel.cxi.merging.refine import find_scale my_find_scale = find_scale(scaler, work_params) sum_I, sum_I_SIGI, \ scaler.completeness, scaler.summed_N, \ scaler.summed_wt_I, scaler.summed_weight, scaler.n_rejected, \ scaler.n_obs, scaler.d_min_values, hkl_ids, i_sigi_list \ = my_find_scale.get_scaling_results(results, scaler) scaler.ISIGI = get_isigi_dict(results) scaler.wavelength = scaler.frames["wavelength"] scaler.corr_values = scaler.frames["cc"] scaler.rejected_fractions = flex.double( scaler.frames["frame_id"].size(), 0.) for irej in range(len(scaler.rejected_fractions)): if scaler.n_obs[irej] > 0: scaler.rejected_fractions = scaler.n_rejected[ irej] / scaler.n_obs[irej] # ---------- End of new code ---------------- if work_params.raw_data.sdfac_refine or work_params.raw_data.errors_from_sample_residuals: if work_params.raw_data.sdfac_refine: if work_params.raw_data.error_models.sdfac_refine.minimizer == 'simplex': from xfel.merging.algorithms.error_model.sdfac_refine import sdfac_refine as error_modeler elif work_params.raw_data.error_models.sdfac_refine.minimizer == 'lbfgs': from xfel.merging.algorithms.error_model.sdfac_refine_lbfgs import sdfac_refine_refltable_lbfgs as error_modeler elif self.params.raw_data.error_models.sdfac_refine.minimizer == 'LevMar': from xfel.merging.algorithms.error_model.sdfac_refine_levmar import sdfac_refine_refltable_levmar as error_modeler if work_params.raw_data.errors_from_sample_residuals: from xfel.merging.algorithms.error_model.errors_from_residuals import errors_from_residuals as error_modeler error_modeler(scaler).adjust_errors() if work_params.raw_data.reduced_chi_squared_correction: from xfel.merging.algorithms.error_model.reduced_chi_squared import reduced_chi_squared reduced_chi_squared(scaler).compute() miller_set_avg = miller_set.customized_copy( unit_cell=work_params.target_unit_cell) table1 = show_overall_observations( obs=miller_set_avg, redundancy=scaler.completeness, redundancy_to_edge=None, summed_wt_I=scaler.summed_wt_I, summed_weight=scaler.summed_weight, ISIGI=scaler.ISIGI, n_bins=work_params.output.n_bins, title="Statistics for all reflections", out=out, work_params=work_params) if table1 is None: raise Exception("table could not be constructed") print("", file=out) if work_params.scaling.algorithm == 'mark0': n_refl, corr = scaler.get_overall_correlation(sum_I) else: n_refl, corr = ((scaler.completeness > 0).count(True), 0) print("\n", file=out) table2 = show_overall_observations( obs=miller_set_avg, redundancy=scaler.summed_N, redundancy_to_edge=None, summed_wt_I=scaler.summed_wt_I, summed_weight=scaler.summed_weight, ISIGI=scaler.ISIGI, n_bins=work_params.output.n_bins, title="Statistics for reflections where I > 0", out=out, work_params=work_params) if table2 is None: raise Exception("table could not be constructed") print("", file=out) mtz_file, miller_array = scaler.finalize_and_save_data() loggraph_file = os.path.abspath("%s_graphs.log" % work_params.output.prefix) f = open(loggraph_file, "w") f.write(table1.format_loggraph()) f.write("\n") f.write(table2.format_loggraph()) f.close() result = scaling_result(miller_array=miller_array, plots=scaler.get_plot_statistics(), mtz_file=mtz_file, loggraph_file=loggraph_file, obs_table=table1, all_obs_table=table2, n_reflections=n_refl, overall_correlation=corr) easy_pickle.dump("%s.pkl" % work_params.output.prefix, result) work_params.output.prefix = reserve_prefix # Output table with number of images contribution reflections per # resolution bin. from libtbx import table_utils miller_set_avg.setup_binner(d_max=100000, d_min=work_params.d_min, n_bins=work_params.output.n_bins) table_data = [["Bin", "Resolution Range", "# images", "%accept"]] if work_params.model is None: appropriate_min_corr = -1.1 # lowest possible c.c. else: appropriate_min_corr = work_params.min_corr n_frames = (scaler.frames['cc'] > appropriate_min_corr).count(True) iselect = 1 while iselect < work_params.output.n_bins: col_count1 = results.count_frames( appropriate_min_corr, miller_set_avg.binner().selection(iselect)) print("colcount1", col_count1) if col_count1 > 0: break iselect += 1 if col_count1 == 0: raise Exception("no reflections in any bins") for i_bin in miller_set_avg.binner().range_used(): col_count = '%8d' % results.count_frames( appropriate_min_corr, miller_set_avg.binner().selection(i_bin)) col_legend = '%-13s' % miller_set_avg.binner().bin_legend( i_bin=i_bin, show_bin_number=False, show_bin_range=False, show_d_range=True, show_counts=False) xpercent = results.count_frames( appropriate_min_corr, miller_set_avg.binner().selection(i_bin)) / float(n_frames) percent = '%5.2f' % (100. * xpercent) table_data.append(['%3d' % i_bin, col_legend, col_count, percent]) table_data.append([""] * len(table_data[0])) table_data.append(["All", "", '%8d' % n_frames]) print(file=out) print(table_utils.format(table_data, has_header=1, justify='center', delim=' '), file=out) reindexing_ops = { "h,k,l": 0 } # get a list of all reindexing ops for this dataset if work_params.merging.reverse_lookup is not None: for key in scaler.reverse_lookup: if reindexing_ops.get(scaler.reverse_lookup[key], None) is None: reindexing_ops[scaler.reverse_lookup[key]] = 0 reindexing_ops[scaler.reverse_lookup[key]] += 1 from xfel.cxi.cxi_cc import run_cc for key in reindexing_ops.keys(): run_cc(work_params, reindexing_op=key, output=out) if isinstance(scaler.ISIGI, dict): from xfel.merging import isigi_dict_to_reflection_table refls = isigi_dict_to_reflection_table(scaler.miller_set.indices(), scaler.ISIGI) else: refls = scaler.ISIGI easy_pickle.dump("%s.refl" % work_params.output.prefix, refls) return result
def find_merge_common_images(args): phil = iotbx.phil.process_command_line(args = args, master_string = master_phil).show() work_params = phil.work.extract() if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ((work_params.model is None) and work_params.scaling.algorithm != "mark1")) : raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)) : raise Usage( "If rescale_with_average_cell=True, you must also specify "+ "set_average_unit_cell=True.") # Read Nat's reference model from an MTZ file. XXX The observation # type is given as F, not I--should they be squared? Check with Nat! log = open("%s_%s_scale.log" % (work_params.output.prefix, work_params.scaling.algorithm), "w") out = multi_out() out.register("log", log, atexit_send_to=None) out.register("stdout", sys.stdout) print >> out, "Target unit cell and space group:" print >> out, " ", work_params.target_unit_cell print >> out, " ", work_params.target_space_group uc = work_params.target_unit_cell miller_set = symmetry( unit_cell=work_params.target_unit_cell, space_group_info=work_params.target_space_group ).build_miller_set( anomalous_flag=not work_params.merge_anomalous, d_min=work_params.d_min) from xfel.cxi.merging.general_fcalc import random_structure i_model = random_structure(work_params) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager( miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.read_all() print "finished reading" sg = miller_set.space_group() pg = sg.build_derived_laue_group() miller_set.show_summary() hkl_asu = scaler.observations["hkl_id"] imageno = scaler.observations["frame_id"] intensi = scaler.observations["i"] sigma_i = scaler.observations["sigi"] lookup = scaler.millers["merged_asu_hkl"] # construct table of start / end indices for frames: now using Python # range indexing starts = [0] ends = [] for x in xrange(1, len(scaler.observations["hkl_id"])): if imageno[x] != imageno[x - 1]: ends.append(x) starts.append(x) ends.append(len(scaler.observations["hkl_id"])) keep_start = [] keep_end = [] for j, se in enumerate(zip(starts, ends)): s, e = se isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s) dmin = 100.0 for x in xrange(s, e): d = uc.d(lookup[hkl_asu[x]]) if d < dmin: dmin = d if isig > 6.0 and dmin < 3.2: keep_start.append(s) keep_end.append(e) starts = keep_start ends = keep_end print 'Keeping %d frames' % len(starts) frames = [] for s, e in zip(starts, ends): indices = [tuple(lookup[hkl_asu[x]]) for x in range(s, e)] intensities = intensi[s:e] sigmas = sigma_i[s:e] frames.append(Frame(uc, indices, intensities, sigmas)) cycle = 0 total_nref = sum([len(f.get_indices()) for f in frames]) # pre-scale the data - first determine average ln(k), B; then apply kbs = [f.kb() for f in frames] mn_k = sum([kb[0] for kb in kbs]) / len(kbs) mn_B = sum([kb[1] for kb in kbs]) / len(kbs) for f in frames: f.scale_to_kb(mn_k, mn_B) while True: print 'Analysing %d frames' % len(frames) print 'Cycle %d' % cycle cycle += 1 print 'Power spectrum' fn = frame_numbers(frames) for j in sorted(fn): print '%4d %4d' % (j, fn[j]) nref_cycle = sum([len(f.get_indices()) for f in frames]) assert(nref_cycle == total_nref) common_reflections = numpy.zeros((len(frames), len(frames)), dtype = numpy.short) obs = { } from cctbx.sgtbx import rt_mx, change_of_basis_op oh = change_of_basis_op(rt_mx('h,l,k')) for j, f in enumerate(frames): indices = set(f.get_indices()) for i in indices: _i = tuple(i) if not _i in obs: obs[_i] = [] obs[_i].append(j) # work through unique observations ignoring those which include no # hand information for hkl in obs: if hkl == oh.apply(hkl): continue obs[hkl].sort() for j, f1 in enumerate(obs[hkl][:-1]): for f2 in obs[hkl][j + 1:]: common_reflections[(f1, f2)] += 1 cmn_rfl_list = [] for f1 in range(len(frames)): for f2 in range(f1 + 1, len(frames)): if common_reflections[(f1, f2)] > 20: cmn_rfl_list.append((common_reflections[(f1, f2)], f1, f2)) cmn_rfl_list.sort() cmn_rfl_list.reverse() joins = [] used = [] for n, f1, f2 in cmn_rfl_list: if f1 in used or f2 in used: continue _cc = frames[f1].cc(frames[f2]) # really only need to worry about f2 which will get merged... # merging multiple files together should be OK provided they are # correctly sorted (though the order should not matter anyhow?) # anyhow they are sorted anyway... ah as f2 > f1 then just sorting # the list by f2 will make sure the data cascase correctly. # p-value very small for cc > 0.75 for > 20 observations - necessary # as will be correlated due to Wilson curves if _cc[0] > 20 and _cc[1] > 0.75: print '%4d %.3f' % _cc, f1, f2 joins.append((f2, f1)) # used.append(f1) used.append(f2) if not joins: print 'No pairs found' break joins.sort() joins.reverse() for j2, j1 in joins: rmerge = frames[j1].merge(frames[j2]) if rmerge: print 'R: %4d %4d %6.3f' % (j1, j2, rmerge) else: print 'R: %4d %4d ------' % (j1, j2) continue frames.sort() print 'Biggest few: #frames; #unique refl' j = -1 while frames[j].get_frames() > 1: print frames[j].get_frames(), frames[j].get_unique_indices() j -= 1 return
def run(args): phil = iotbx.phil.process_command_line( args = args, master_string = master_phil) work_params = phil.work.extract() if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ((work_params.model is None) and work_params.scaling.algorithm != "mark1")): raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)) : raise Usage("If rescale_with_average_cell=True, you must also specify "+ "set_average_unit_cell=True.") miller_set = symmetry( unit_cell = work_params.target_unit_cell, space_group_info = work_params.target_space_group ).build_miller_set( anomalous_flag = not work_params.merge_anomalous, d_min = work_params.d_min) from xfel.cxi.merging.general_fcalc import random_structure i_model = random_structure(work_params) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager( miller_set = miller_set, i_model = i_model, params = work_params) scaler.read_all() sg = miller_set.space_group() pg = sg.build_derived_laue_group() rational_ops = [] for symop in pg: rational_ops.append((matrix.sqr(symop.r().transpose().as_rational()), symop.r().as_hkl())) # miller_set.show_summary() uc = work_params.target_unit_cell hkl_asu = scaler.observations["hkl_id"] imageno = scaler.observations["frame_id"] intensi = scaler.observations["i"] sigma_i = scaler.observations["sigi"] lookup = scaler.millers["merged_asu_hkl"] origH = scaler.observations["H"] origK = scaler.observations["K"] origL = scaler.observations["L"] from cctbx.miller import map_to_asu sgtype = miller_set.space_group_info().type() aflag = miller_set.anomalous_flag() from cctbx.array_family import flex # FIXME in here perform the mapping to ASU for both the original and other # index as an array-wise manipulation to make things a bunch faster... # however this also uses a big chunk of RAM... FIXME also in here use # cb_op.apply(indices) to get the indices reindexed... original_indices = flex.miller_index() for x in xrange(len(scaler.observations["hkl_id"])): original_indices.append(lookup[hkl_asu[x]]) from cctbx.sgtbx import change_of_basis_op I23 = change_of_basis_op('k, -h, l') other_indices = I23.apply(original_indices) map_to_asu(sgtype, aflag, original_indices) map_to_asu(sgtype, aflag, other_indices) # FIXME would be useful in here to have a less expensive way of finding the # symmetry operation which gave the map to the ASU - perhaps best way is to # make a new C++ map_to_asu which records this. # FIXME in here recover the original frame structure of the data to # logical frame objetcs - N.B. the frame will need to be augmented to test # alternative indexings # construct table of start / end indices for frames: now using Python # range indexing starts = [0] ends = [] for x in xrange(1, len(scaler.observations["hkl_id"])): if imageno[x] != imageno[x - 1]: ends.append(x) starts.append(x) ends.append(len(scaler.observations["hkl_id"])) keep_start = [] keep_end = [] for j, se in enumerate(zip(starts, ends)): print 'processing frame %d: %d to %d' % (j, se[0], se[1]) s, e = se isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s) dmin = 100.0 for x in xrange(s, e): d = uc.d(lookup[hkl_asu[x]]) if d < dmin: dmin = d if isig > 6.0 and dmin < 3.2: keep_start.append(s) keep_end.append(e) starts = keep_start ends = keep_end print 'Keeping %d frames' % len(starts) # then start running the comparison code frames = [] for s, e in zip(starts, ends): # FIXME need this from remap to ASU misym = [0 for x in range(s, e)] indices = [original_indices[x] for x in range(s, e)] other = [other_indices[x] for x in range(s, e)] intensities = intensi[s:e] sigmas = sigma_i[s:e] frames.append(Frame(uc, indices, other, intensities, sigmas)) cycle = 0 total_nref = sum([len(f.get_indices()) for f in frames]) # pre-scale the data - first determine average ln(k), B; then apply kbs = [f.kb() for f in frames] mn_k = sum([kb[0] for kb in kbs]) / len(kbs) mn_B = sum([kb[1] for kb in kbs]) / len(kbs) for f in frames: f.scale_to_kb(mn_k, mn_B) while True: print 'Analysing %d frames' % len(frames) print 'Cycle %d' % cycle cycle += 1 print 'Power spectrum' fn = frame_numbers(frames) for j in sorted(fn): print '%4d %4d' % (j, fn[j]) nref_cycle = sum([len(f.get_indices()) for f in frames]) assert(nref_cycle == total_nref) # first work on the original indices import numpy common_reflections = numpy.zeros((len(frames), len(frames)), dtype = numpy.short) obs = { } # for other hand add -j for j, f in enumerate(frames): indices = set(f.get_indices()) for i in indices: _i = tuple(i) if not _i in obs: obs[_i] = [] obs[_i].append(j) for hkl in obs: obs[hkl].sort() for j, f1 in enumerate(obs[hkl][:-1]): for f2 in obs[hkl][j + 1:]: if f1 * f2 > 0: common_reflections[(abs(f1), abs(f2))] += 1 cmn_rfl_list = [] for f1 in range(len(frames)): for f2 in range(f1 + 1, len(frames)): if common_reflections[(f1, f2)] > 10: cmn_rfl_list.append((common_reflections[(f1, f2)], f1, f2)) cmn_rfl_list.sort() cmn_rfl_list.reverse() joins = [] used = [] for n, f1, f2 in cmn_rfl_list: if f1 in used or f2 in used: continue _cc = frames[f1].cc(frames[f2]) # really only need to worry about f2 which will get merged... # merging multiple files together should be OK provided they are # correctly sorted (though the order should not matter anyhow?) # anyhow they are sorted anyway... ah as f2 > f1 then just sorting # the list by f2 will make sure the data cascase correctly. # p-value small (3% ish) for cc > 0.6 for > 10 observations - # necessary as will be correlated due to Wilson curves though # with B factor < 10 this is less of an issue if _cc[0] > 10 and _cc[1] > 0.6: print '%4d %.3f' % _cc, f1, f2 joins.append((f2, f1)) used.append(f2) if not joins: print 'No pairs found' break joins.sort() joins.reverse() for j2, j1 in joins: rmerge = frames[j1].merge(frames[j2]) if rmerge: print 'R: %4d %4d %6.3f' % (j1, j2, rmerge) else: print 'R: %4d %4d ------' % (j1, j2) all_joins = [j for j in joins] # then do the same for the alternative indices other_reflections = numpy.zeros((len(frames), len(frames)), dtype = numpy.short) obs = { } # for other hand add -j for j, f in enumerate(frames): indices = set(f.get_indices()) for i in indices: _i = tuple(i) if not _i in obs: obs[_i] = [] obs[_i].append(j) indices = set(f.get_other()) for i in indices: _i = tuple(i) if not _i in obs: obs[_i] = [] obs[_i].append(-j) for hkl in obs: obs[hkl].sort() for j, f1 in enumerate(obs[hkl][:-1]): for f2 in obs[hkl][j + 1:]: if f1 * f2 < 0: other_reflections[(abs(f1), abs(f2))] += 1 oth_rfl_list = [] for f1 in range(len(frames)): for f2 in range(f1 + 1, len(frames)): if other_reflections[(f1, f2)] > 10: oth_rfl_list.append((other_reflections[(f1, f2)], f1, f2)) joins = [] oth_rfl_list.sort() oth_rfl_list.reverse() for n, f1, f2 in oth_rfl_list: if f1 in used or f2 in used: continue _cc = frames[f1].cc_other(frames[f2]) # really only need to worry about f2 which will get merged... # merging multiple files together should be OK provided they are # correctly sorted (though the order should not matter anyhow?) # anyhow they are sorted anyway... ah as f2 > f1 then just sorting # the list by f2 will make sure the data cascase correctly. # p-value small (3% ish) for cc > 0.6 for > 10 observations - # necessary as will be correlated due to Wilson curves though # with B factor < 10 this is less of an issue if _cc[0] > 10 and _cc[1] > 0.6: print '%4d %.3f' % _cc, f1, f2 joins.append((f2, f1)) used.append(f2) all_joins += joins if not all_joins: break joins.sort() joins.reverse() for j2, j1 in joins: frames[j2].reindex() rmerge = frames[j1].merge(frames[j2]) if rmerge: print 'R: %4d %4d %6.3f' % (j1, j2, rmerge) else: print 'R: %4d %4d ------' % (j1, j2) continue frames.sort() print 'Biggest few: #frames; #unique refl' j = -1 while frames[j].get_frames() > 1: print frames[j].get_frames(), frames[j].get_unique_indices() frames[j].output_as_scalepack(sg, 'scalepack-%d.sca' % j) j -= 1 return
def find_merge_common_images(args): phil = iotbx.phil.process_command_line(args = args, master_string = master_phil).show() work_params = phil.work.extract() if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ((work_params.model is None) and work_params.scaling.algorithm != "mark1")) : raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)) : raise Usage( "If rescale_with_average_cell=True, you must also specify "+ "set_average_unit_cell=True.") # Read Nat's reference model from an MTZ file. XXX The observation # type is given as F, not I--should they be squared? Check with Nat! log = open("%s_%s_scale.log" % (work_params.output.prefix, work_params.scaling.algorithm), "w") out = multi_out() out.register("log", log, atexit_send_to=None) out.register("stdout", sys.stdout) print >> out, "Target unit cell and space group:" print >> out, " ", work_params.target_unit_cell print >> out, " ", work_params.target_space_group uc = work_params.target_unit_cell miller_set = symmetry( unit_cell=work_params.target_unit_cell, space_group_info=work_params.target_space_group ).build_miller_set( anomalous_flag=not work_params.merge_anomalous, d_min=work_params.d_min) from xfel.cxi.merging.general_fcalc import random_structure i_model = random_structure(work_params) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager( miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.read_all() print "finished reading" sg = miller_set.space_group() pg = sg.build_derived_laue_group() miller_set.show_summary() hkl_asu = scaler.observations["hkl_id"] imageno = scaler.observations["frame_id"] intensi = scaler.observations["i"] sigma_i = scaler.observations["sigi"] lookup = scaler.millers["merged_asu_hkl"] # construct table of start / end indices for frames: now using Python # range indexing starts = [0] ends = [] for x in xrange(1, len(scaler.observations["hkl_id"])): if imageno[x] != imageno[x - 1]: ends.append(x) starts.append(x) ends.append(len(scaler.observations["hkl_id"])) keep_start = [] keep_end = [] for j, se in enumerate(zip(starts, ends)): s, e = se isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s) dmin = 100.0 for x in xrange(s, e): d = uc.d(lookup[hkl_asu[x]]) if d < dmin: dmin = d if isig > 6.0 and dmin < 3.2: keep_start.append(s) keep_end.append(e) starts = keep_start ends = keep_end print 'Keeping %d frames' % len(starts) frames = [] for s, e in zip(starts, ends): indices = [tuple(lookup[hkl_asu[x]]) for x in range(s, e)] intensities = intensi[s:e] sigmas = sigma_i[s:e] frames.append(Frame(uc, indices, intensities, sigmas)) from collections import defaultdict hist = defaultdict(int) fout = open('common_oh.dat', 'w') for j, f in enumerate(frames): hp = f.hand_pairs() fout.write('%4d %d\n' % (j, hp)) hist[int(hp)] += 1 fout.close() for b in sorted(hist): print b, hist[b] return
def run(args): phil = iotbx.phil.process_command_line(args=args, master_string=master_phil).show() work_params = phil.work.extract() from xfel.merging.phil_validation import application,samosa application(work_params) samosa(work_params) if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) ) : command_name = os.environ["LIBTBX_DISPATCHER_NAME"] raise Usage(command_name + " " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)) : raise Usage("If rescale_with_average_cell=True, you must also specify "+ "set_average_unit_cell=True.") if work_params.raw_data.sdfac_auto and work_params.raw_data.sdfac_refine: raise Usage("Cannot specify both sdfac_auto and sdfac_refine") # Read Nat's reference model from an MTZ file. XXX The observation # type is given as F, not I--should they be squared? Check with Nat! log = open("%s.log" % work_params.output.prefix, "w") out = multi_out() out.register("log", log, atexit_send_to=None) out.register("stdout", sys.stdout) print >> out, "I model" if work_params.model is not None: from xfel.merging.general_fcalc import run i_model = run(work_params) work_params.target_unit_cell = i_model.unit_cell() work_params.target_space_group = i_model.space_group_info() i_model.show_summary() else: i_model = None print >> out, "Target unit cell and space group:" print >> out, " ", work_params.target_unit_cell print >> out, " ", work_params.target_space_group # Adjust the minimum d-spacing of the generated Miller set to assure # that the desired high-resolution limit is included even if the # observed unit cell differs slightly from the target. If a # reference model is present, ensure that Miller indices are ordered # identically. miller_set = symmetry( unit_cell=work_params.target_unit_cell, space_group_info=work_params.target_space_group ).build_miller_set( anomalous_flag=not work_params.merge_anomalous, d_max=work_params.d_max, d_min=work_params.d_min / math.pow( 1 + work_params.unit_cell_length_tolerance, 1 / 3)) miller_set = miller_set.change_basis( work_params.model_reindex_op).map_to_asu() if i_model is not None: matches = miller.match_indices(i_model.indices(), miller_set.indices()) assert not matches.have_singles() miller_set = miller_set.select(matches.permutation()) frame_files = get_observations(work_params) scaler = scaling_manager( miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.scale_all(frame_files) if scaler.n_accepted == 0: return None scaler.show_unit_cell_histograms() if (work_params.rescale_with_average_cell) : average_cell_abc = scaler.uc_values.get_average_cell_dimensions() average_cell = uctbx.unit_cell(list(average_cell_abc) + list(work_params.target_unit_cell.parameters()[3:])) work_params.target_unit_cell = average_cell print >> out, "" print >> out, "#" * 80 print >> out, "RESCALING WITH NEW TARGET CELL" print >> out, " average cell: %g %g %g %g %g %g" % \ work_params.target_unit_cell.parameters() print >> out, "" scaler.reset() scaler.scale_all(frame_files) scaler.show_unit_cell_histograms() if False : #(work_params.output.show_plots) : try : plot_overall_completeness(completeness) except Exception, e : print "ERROR: can't show plots" print " %s" % str(e)
def find_merge_common_images(args): phil = iotbx.phil.process_command_line(args = args, master_string = master_phil).show() work_params = phil.work.extract() if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ((work_params.model is None) and work_params.scaling.algorithm != "mark1")) : raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)) : raise Usage( "If rescale_with_average_cell=True, you must also specify "+ "set_average_unit_cell=True.") # Read Nat's reference model from an MTZ file. XXX The observation # type is given as F, not I--should they be squared? Check with Nat! log = open("%s_%s_scale.log" % (work_params.output.prefix, work_params.scaling.algorithm), "w") out = multi_out() out.register("log", log, atexit_send_to=None) out.register("stdout", sys.stdout) print >> out, "Target unit cell and space group:" print >> out, " ", work_params.target_unit_cell print >> out, " ", work_params.target_space_group uc = work_params.target_unit_cell miller_set = symmetry( unit_cell=work_params.target_unit_cell, space_group_info=work_params.target_space_group ).build_miller_set( anomalous_flag=not work_params.merge_anomalous, d_min=work_params.d_min) from xfel.cxi.merging.general_fcalc import random_structure i_model = random_structure(work_params) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager( miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.read_all() print "finished reading" sg = miller_set.space_group() pg = sg.build_derived_laue_group() miller_set.show_summary() hkl_asu = scaler.observations["hkl_id"] imageno = scaler.observations["frame_id"] intensi = scaler.observations["i"] sigma_i = scaler.observations["sigi"] lookup = scaler.millers["merged_asu_hkl"] # construct table of start / end indices for frames: now using Python # range indexing starts = [0] ends = [] for x in xrange(1, len(scaler.observations["hkl_id"])): if imageno[x] != imageno[x - 1]: ends.append(x) starts.append(x) ends.append(len(scaler.observations["hkl_id"])) keep_start = [] keep_end = [] for j, se in enumerate(zip(starts, ends)): s, e = se isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s) dmin = 100.0 for x in xrange(s, e): d = uc.d(lookup[hkl_asu[x]]) if d < dmin: dmin = d if isig > 6.0 and dmin < 3.2: keep_start.append(s) keep_end.append(e) starts = keep_start ends = keep_end print 'Keeping %d frames' % len(starts) frames = [] for s, e in zip(starts, ends): indices = [tuple(lookup[hkl_asu[x]]) for x in range(s, e)] intensities = intensi[s:e] sigmas = sigma_i[s:e] frames.append(Frame(indices, intensities, sigmas)) while True: print 'Analysing %d frames' % len(frames) common_reflections = numpy.zeros((len(frames), len(frames)), dtype = numpy.short) obs = { } for j, f in enumerate(frames): indices = set(f.get_indices()) for i in indices: _i = tuple(i) if not _i in obs: obs[_i] = [] obs[_i].append(j) for hkl in obs: obs[hkl].sort() for j, f1 in enumerate(obs[hkl][:-1]): for f2 in obs[hkl][j + 1:]: common_reflections[(f1, f2)] += 1 cmn_rfl_list = [] for f1 in range(len(frames)): for f2 in range(f1, len(frames)): cmn_rfl_list.append((common_reflections[(f1, f2)], f1, f2)) cmn_rfl_list.sort() cmn_rfl_list.reverse() joins = [] for n, f1, f2 in cmn_rfl_list: _cc = frames[f1].cc(frames[f2]) if _cc[0] > 20 and _cc[1] > 0.75: # print '===> %d %d' % (n, frames[f1].common(frames[f2])) print '%4d %.3f' % _cc, f1, f2 joins.append((f1, f2)) if not joins: print 'No pairs found' break joins.sort() joins.reverse() for j1, j2 in joins: frames[j1].merge(frames[j2]) frames.remove(frames[j2]) continue return