def scale_data(indices, iobs, scale_ref, parameter, calc_cc): k, b, cc = 1, float("nan"), float("nan") sortp = yamtbx_utils_ext.sort_permutation_fast_less(indices) indices = indices.select(sortp) iobs = iobs.select(sortp) sel0, sel1 = yamtbx_utils_ext.my_common_indices(scale_ref.indices(), indices) #indices = indices.select(sel1) iobs_c = iobs.select(sel1) ref_c = scale_ref.data().select(sel0) if iobs_c.size() < 10 and ref_c.size() < 10: return k, b, cc if parameter == "k": k = flex.sum(ref_c*iobs_c) / flex.sum(flex.pow2(iobs_c)) elif parameter == "kb": from yamtbx.dataproc.scale_data import kBdecider kbd = kBdecider(scale_ref, miller.array(scale_ref.customized_copy(indices=indices),data=iobs)) k, b = kbd.run() else: raise "Never reaches here" if calc_cc: corr = flex.linear_correlation(ref_c, iobs_c) if corr.is_well_defined(): cc = corr.coefficient() return k, b, cc
#if "hkl" in mtzfile: # Is[-1][1] = miller.array(miller_set=Is[-1][1], data= Is[-1][1].data() * flex.exp(4.8*Is[-1][1].d_star_sq().data())) # Take common sets Is = commonalize(Is) #### # Decide scale if not params.noscale: for i in xrange(1, len(Is)): I = Is[i][1].resolution_filter(d_max=params.scale.dmax, d_min=params.scale.dmin) I0 = Is[0][1].resolution_filter(d_max=params.scale.dmax, d_min=params.scale.dmin) I, I0 = I.common_sets(I0, assert_is_similar_symmetry=False) if params.scale.bscale: Is[i][2] = kBdecider(I0, I).run() print "Scale for", Is[i][0], "is", Is[i][2] else: scale = flex.sum(I0.data()*I.data()) / flex.sum(flex.pow2(I.data())) Is[i][2] = scale, 0 print "Scale for", Is[i][0], "is", scale print Is[0][1].data().size() # Prepare plot data for_plot = OrderedDict() # {name: [mean, ...], ..} binner = Is[0][1].setup_binner(n_bins=params.nbins)#reflections_per_bin=50) for i_bin in binner.range_used(): for name, I, (scale, b) in Is: dmax, dmin = binner.bin_d_range(i_bin) Isel = I.resolution_filter(d_max=dmax, d_min=dmin)
def run(params, mtzfiles): arrays = get_arrays(mtzfiles, d_min=params.dmin, d_max=params.dmax) if params.take_common: arrays = commonalize(arrays) maxlen_f = max(map(lambda x: len(x[0]), arrays)) ref_f_obs = arrays[0][1] scales = [] for f, f_obs, f_model, flag in arrays: if ref_f_obs == f_obs: k, B = 1., 0 else: k, B = kBdecider(ref_f_obs, f_obs).run() scales.append((k, B)) if params.reference != "first": if params.reference == "bmin": # scale to strongest kref, bref = max(scales, key=lambda x:x[1]) elif params.reference == "bmax": # scale to most weak kref, bref = min(scales, key=lambda x:x[1]) elif params.reference == "bmed": # scale to most weak perm = range(len(scales)) perm.sort(key=lambda i:scales[i][1]) kref, bref = scales[perm[len(perm)//2]] else: raise "Never reaches here" print "# Set K=%.2f B=%.2f as reference" % (kref,bref) scales = map(lambda x: (x[0]/kref, x[1]-bref), scales) # not bref-x[1], because negated later print ("%"+str(maxlen_f)+"s r_work r_free cc_work.E cc_free.E sigmaa fom k B") % "filename" for (f, f_obs, f_model, flag), (k, B) in zip(arrays, scales): d_star_sq = f_obs.d_star_sq().data() scale = k * flex.exp(-B*d_star_sq) # Normalized #f_obs.setup_binner(auto_binning=True) #f_model.setup_binner(auto_binning=True) #e_obs, e_model = map(lambda x:x.quasi_normalize_structure_factors(), (f_obs, f_model)) e_obs = absolute_scaling.kernel_normalisation(f_obs.customized_copy(data=f_obs.data()*scale, sigmas=None), auto_kernel=True) e_obs = e_obs.normalised_miller_dev_eps.f_sq_as_f() e_model = absolute_scaling.kernel_normalisation(f_model.customized_copy(data=f_model.data()*scale, sigmas=None), auto_kernel=True) e_model = e_model.normalised_miller_dev_eps.f_sq_as_f() f_obs_w, f_obs_t = f_obs.select(~flag.data()), f_obs.select(flag.data()) f_model_w, f_model_t = f_model.select(~flag.data()), f_model.select(flag.data()) e_obs_w, e_obs_t = e_obs.select(~flag.data()), e_obs.select(flag.data()) e_model_w, e_model_t = e_model.select(~flag.data()), e_model.select(flag.data()) r_work = calc_r(f_obs_w, f_model_w, scale.select(~flag.data())) r_free = calc_r(f_obs_t, f_model_t, scale.select(flag.data())) cc_work_E = calc_cc(e_obs_w, e_model_w, False) cc_free_E = calc_cc(e_obs_t, e_model_t, False) #cc_work_E2 = calc_cc(e_obs_w, e_model_w, True) #cc_free_E2 = calc_cc(e_obs_t, e_model_t, True) se = calc_sigmaa(f_obs, f_model, flag) sigmaa = flex.mean(se.sigmaa().data()) fom = flex.mean(se.fom().data()) print ("%"+str(maxlen_f)+"s %.4f %.4f % 7.4f % 7.4f %.4e %.4e %.3e %.3e") % (f, r_work, r_free, cc_work_E, cc_free_E, sigmaa, fom, k, B)
def run(params, xfiles): # read reference arrays = iotbx.file_reader.any_file( params.reference.file).file_server.miller_arrays arrays = filter(lambda ar: ar.is_xray_data_array(), arrays) if params.reference.label is not None: arrays = filter( lambda ar: ar.info().label_string() == params.reference.label, arrays) if len(arrays) != 1: print "Can't decide data to use in reference file:", params.reference.file print "Choose label" for ar in arrays: print ar.info().label_string() return refdata = arrays[0].as_intensity_array() refdata = refdata.resolution_filter(d_max=params.reference.d_max, d_min=params.reference.d_min) print "file n.common k b cc.org cc.mean cc.scaled a b c al be ga" for xf in xfiles: print "# Reading", xf try: xfile = DenzoXfile(xf) except: traceback.print_exc() continue a = xfile.miller_array(anomalous_flag=refdata.anomalous_flag()) a = a.select(a.sigmas() > 0) a = a.resolution_filter(d_min=params.d_min, d_max=params.d_max) if params.sigma_cutoff is not None: a = a.select(a.data() / a.sigmas() >= params.sigma_cutoff) a = a.merge_equivalents(use_internal_variance=False).array() tmp, a = refdata.common_sets(a, assert_is_similar_symmetry=False) n_common = tmp.size() if n_common == 0: print "# No useful reflection in this file. skip." continue corr = flex.linear_correlation(tmp.data(), a.data()) cc_org = corr.coefficient() if corr.is_well_defined() else float("nan") # Calc CC in resolution bin and average tmp.setup_binner(auto_binning=True) cc_bins = [] for i_bin in tmp.binner().range_used(): sel = tmp.binner().selection(i_bin) corr = flex.linear_correlation( tmp.select(sel).data(), a.select(sel).data()) if not corr.is_well_defined(): continue cc_bins.append(corr.coefficient()) cc_mean = sum(cc_bins) / float( len(cc_bins)) if len(cc_bins) > 0 else float("nan") # Determine scale and B k, b = kBdecider(tmp, a).run() bfac = flex.exp(-b * a.d_star_sq().data()) if b != 0 else 1. corr = flex.linear_correlation(tmp.data(), a.data() * k * bfac) cc_scaled = corr.coefficient() if corr.is_well_defined() else float( "nan") print "%s %5d %.3e %.3e %.4f %.4f %.4f" % (xf, n_common, k, b, cc_org, cc_mean, cc_scaled), print("%.3f " * 6) % a.unit_cell().parameters() if params.show_plot: import pylab from matplotlib.ticker import FuncFormatter s3_formatter = lambda x, pos: "inf" if x == 0 else "%.2f" % (x**( -1 / 3)) fig, ax1 = pylab.plt.subplots() plot_x = map(lambda i: tmp.binner().bin_d_range(i)[1]**(-3), tmp.binner().range_used()) #for name, ar in (("reference", tmp), ("data", a)): vals = map( lambda i: flex.mean(tmp.data().select(tmp.binner().selection(i) )), tmp.binner().range_used()) pylab.plot(plot_x, vals, label="reference") scale = flex.sum(tmp.data() * a.data()) / flex.sum( flex.pow2(a.data())) print "Linear-scale=", scale vals = map( lambda i: scale * flex.mean(a.data().select(tmp.binner(). selection(i))), tmp.binner().range_used()) pylab.plot(plot_x, vals, label="data") vals = map( lambda i: flex.mean( (a.data() * k * bfac).select(tmp.binner().selection(i))), tmp.binner().range_used()) pylab.plot(plot_x, vals, label="data_scaled") """ from mmtbx.scaling import absolute_scaling, relative_scaling ls_scaling = relative_scaling.ls_rel_scale_driver(tmp, tmp.customized_copy(data=a.data(),sigmas=a.sigmas()), use_intensities=True, scale_weight=True, use_weights=True) ls_scaling.show() vals = map(lambda i: flex.mean(ls_scaling.derivative.resolution_filter(*tmp.binner().bin_d_range(i)).data()), tmp.binner().range_used()) pylab.plot(plot_x, vals, label="data_scaled2") """ pylab.legend() pylab.xlabel('resolution (d^-3)') pylab.ylabel('<I>') pylab.setp(pylab.gca().get_legend().get_texts(), fontsize="small") pylab.title('Scaled with B-factors (%.2f)' % b) pylab.gca().xaxis.set_major_formatter(FuncFormatter(s3_formatter)) ax2 = ax1.twinx() ax2.plot(plot_x, cc_bins, "black") ax2.set_ylabel('CC') pylab.show()
def run(params, xfiles): # read reference arrays = iotbx.file_reader.any_file(params.reference.file).file_server.miller_arrays arrays = filter(lambda ar: ar.is_xray_data_array(), arrays) if params.reference.label is not None: arrays = filter(lambda ar: ar.info().label_string() == params.reference.label, arrays) if len(arrays) != 1: print "Can't decide data to use in reference file:", params.reference.file print "Choose label" for ar in arrays: print ar.info().label_string() return refdata = arrays[0].as_intensity_array() refdata = refdata.resolution_filter(d_max=params.reference.d_max, d_min=params.reference.d_min) print "file n.common k b cc.org cc.mean cc.scaled a b c al be ga" for xf in xfiles: print "# Reading", xf try: xfile = DenzoXfile(xf) except: traceback.print_exc() continue a = xfile.miller_array(anomalous_flag=refdata.anomalous_flag()) a = a.select(a.sigmas() > 0) a = a.resolution_filter(d_min=params.d_min, d_max=params.d_max) if params.sigma_cutoff is not None: a = a.select(a.data()/a.sigmas() >= params.sigma_cutoff) a = a.merge_equivalents(use_internal_variance=False).array() tmp, a = refdata.common_sets(a, assert_is_similar_symmetry=False) n_common = tmp.size() if n_common == 0: print "# No useful reflection in this file. skip." continue corr = flex.linear_correlation(tmp.data(), a.data()) cc_org = corr.coefficient() if corr.is_well_defined() else float("nan") # Calc CC in resolution bin and average tmp.setup_binner(auto_binning=True) cc_bins = [] for i_bin in tmp.binner().range_used(): sel = tmp.binner().selection(i_bin) corr = flex.linear_correlation(tmp.select(sel).data(), a.select(sel).data()) if not corr.is_well_defined(): continue cc_bins.append(corr.coefficient()) cc_mean = sum(cc_bins) / float(len(cc_bins)) if len(cc_bins) > 0 else float("nan") # Determine scale and B k, b = kBdecider(tmp, a).run() bfac = flex.exp(-b * a.d_star_sq().data()) if b != 0 else 1. corr = flex.linear_correlation(tmp.data(), a.data() * k*bfac) cc_scaled = corr.coefficient() if corr.is_well_defined() else float("nan") print "%s %5d %.3e %.3e %.4f %.4f %.4f" % (xf, n_common, k, b, cc_org, cc_mean, cc_scaled), print ("%.3f "*6)%a.unit_cell().parameters() if params.show_plot: import pylab from matplotlib.ticker import FuncFormatter s3_formatter = lambda x,pos: "inf" if x == 0 else "%.2f" % (x**(-1/3)) fig, ax1 = pylab.plt.subplots() plot_x = map(lambda i: tmp.binner().bin_d_range(i)[1]**(-3), tmp.binner().range_used()) #for name, ar in (("reference", tmp), ("data", a)): vals = map(lambda i: flex.mean(tmp.data().select(tmp.binner().selection(i))), tmp.binner().range_used()) pylab.plot(plot_x, vals, label="reference") scale = flex.sum(tmp.data()*a.data()) / flex.sum(flex.pow2(a.data())) print "Linear-scale=", scale vals = map(lambda i: scale*flex.mean(a.data().select(tmp.binner().selection(i))), tmp.binner().range_used()) pylab.plot(plot_x, vals, label="data") vals = map(lambda i: flex.mean((a.data()*k*bfac).select(tmp.binner().selection(i))), tmp.binner().range_used()) pylab.plot(plot_x, vals, label="data_scaled") """ from mmtbx.scaling import absolute_scaling, relative_scaling ls_scaling = relative_scaling.ls_rel_scale_driver(tmp, tmp.customized_copy(data=a.data(),sigmas=a.sigmas()), use_intensities=True, scale_weight=True, use_weights=True) ls_scaling.show() vals = map(lambda i: flex.mean(ls_scaling.derivative.resolution_filter(*tmp.binner().bin_d_range(i)).data()), tmp.binner().range_used()) pylab.plot(plot_x, vals, label="data_scaled2") """ pylab.legend() pylab.xlabel('resolution (d^-3)') pylab.ylabel('<I>') pylab.setp(pylab.gca().get_legend().get_texts(), fontsize="small") pylab.title('Scaled with B-factors (%.2f)' % b) pylab.gca().xaxis.set_major_formatter(FuncFormatter(s3_formatter)) ax2 = ax1.twinx() ax2.plot(plot_x, cc_bins, "black") ax2.set_ylabel('CC') pylab.show()
x[1] = x[1].customized_copy(crystal_symmetry=Is[0][1]) # Take common sets Is = commonalize(Is) #### # Decide scale if not params.noscale: for i in xrange(1, len(Is)): I = Is[i][1].resolution_filter(d_max=params.scale.dmax, d_min=params.scale.dmin) I0 = Is[0][1].resolution_filter(d_max=params.scale.dmax, d_min=params.scale.dmin) I, I0 = I.common_sets(I0, assert_is_similar_symmetry=False) if params.scale.bscale: Is[i][2] = kBdecider(I0, I).run() print "Scale for", Is[i][0], "is", Is[i][2] else: scale = flex.sum(I0.data() * I.data()) / flex.sum( flex.pow2(I.data())) Is[i][2] = scale, 0 print "Scale for", Is[i][0], "is", scale print Is[0][1].data().size() # Prepare plot data for_plot = OrderedDict() # {name: [mean, ...], ..} binner = Is[0][1].setup_binner( n_bins=params.nbins) #reflections_per_bin=50) for i_bin in binner.range_used(): for name, I, (scale, b) in Is: dmax, dmin = binner.bin_d_range(i_bin)