def __init__(self, miller_obs, miller_calc, miller_mock, n_reso_bins=25, n_e_bins=20, thres=3.0): self.miller_obs = miller_obs self.miller_calc = miller_calc self.miller_mock = miller_mock # take a common set to avoid possible problems self.miller_calc = self.miller_calc.common_set(self.miller_obs) self.miller_mock = self.miller_mock.common_set(self.miller_obs) # we need to normalise the data, both fobs and fcalc norma_obs_obj = absolute_scaling.kernel_normalisation(self.miller_obs, auto_kernel=True) norma_calc_obj = absolute_scaling.kernel_normalisation( self.miller_calc, auto_kernel=True) norma_mock_obj = absolute_scaling.kernel_normalisation( self.miller_mock, auto_kernel=True) self.norma_obs = norma_obs_obj.normalised_miller_dev_eps.f_sq_as_f( ) # normalized data (dived by eps) self.norma_calc = norma_calc_obj.normalised_miller_dev_eps.f_sq_as_f( ) # as above, for calculated data self.norma_mock = norma_mock_obj.normalised_miller_dev_eps.f_sq_as_f( ) # as above, for mock data self.norma_obs_const = norma_obs_obj.normalizer_for_miller_array # the divisor (no eps) self.norma_calc_const = norma_calc_obj.normalizer_for_miller_array # as above self.norma_mock_const = norma_mock_obj.normalizer_for_miller_array # as above self.thres = thres self.n_reso_bins = n_reso_bins self.n_e_bins = n_e_bins # first set up a binner please self.miller_obs.setup_binner(n_bins=self.n_reso_bins) self.miller_calc.use_binner_of(self.miller_obs) self.miller_mock.use_binner_of(self.miller_obs) self.norma_obs.use_binner_of(self.miller_obs) self.norma_calc.use_binner_of(self.miller_calc) self.norma_mock.use_binner_of(self.miller_mock) self.new_norma_obs = self.norma_obs.deep_copy().set_observation_type( self.norma_obs) self.new_obs = None self.swap_it() #we have to denormalize the data now self.new_obs = self.norma_obs.customized_copy( data=self.new_norma_obs.data() * self.new_norma_obs.epsilons().data().as_double() * flex.sqrt(self.norma_calc_const), sigmas=self.new_norma_obs.sigmas() * self.new_norma_obs.epsilons().data().as_double() * flex.sqrt(self.norma_calc_const)).set_observation_type( self.miller_obs)
def test_kernel_based_normalisation(): miller_array = random_data(35.0, d_min=2.5) normalizer = absolute_scaling.kernel_normalisation(miller_array, auto_kernel=True) z_values = normalizer.normalised_miller.data()/\ normalizer.normalised_miller.epsilons().data().as_double() z_values = flex.mean(z_values) assert approx_equal(1.0, z_values, eps=0.05) # This should raise an error rather than enter an infinite loop with raises(AssertionError) as e: absolute_scaling.kernel_normalisation( miller_array[:1].set_observation_type_xray_amplitude(), auto_kernel=True)
def __init__(self, miller_obs, miller_calc, miller_mock, n_reso_bins=25, n_e_bins = 20, thres=3.0): self.miller_obs = miller_obs self.miller_calc = miller_calc self.miller_mock = miller_mock # take a common set to avoid possible problems self.miller_calc = self.miller_calc.common_set( self.miller_obs ) self.miller_mock = self.miller_mock.common_set( self.miller_obs ) # we need to normalise the data, both fobs and fcalc norma_obs_obj = absolute_scaling.kernel_normalisation( self.miller_obs,auto_kernel=True ) norma_calc_obj = absolute_scaling.kernel_normalisation( self.miller_calc,auto_kernel=True ) norma_mock_obj = absolute_scaling.kernel_normalisation( self.miller_mock,auto_kernel=True ) self.norma_obs = norma_obs_obj.normalised_miller_dev_eps.f_sq_as_f() # normalized data (dived by eps) self.norma_calc = norma_calc_obj.normalised_miller_dev_eps.f_sq_as_f() # as above, for calculated data self.norma_mock = norma_mock_obj.normalised_miller_dev_eps.f_sq_as_f() # as above, for mock data self.norma_obs_const = norma_obs_obj.normalizer_for_miller_array # the divisor (no eps) self.norma_calc_const = norma_calc_obj.normalizer_for_miller_array # as above self.norma_mock_const = norma_mock_obj.normalizer_for_miller_array # as above self.thres = thres self.n_reso_bins = n_reso_bins self.n_e_bins = n_e_bins # first set up a binner please self.miller_obs.setup_binner(n_bins = self.n_reso_bins ) self.miller_calc.use_binner_of( self.miller_obs ) self.miller_mock.use_binner_of( self.miller_obs ) self.norma_obs.use_binner_of( self.miller_obs ) self.norma_calc.use_binner_of( self.miller_calc ) self.norma_mock.use_binner_of( self.miller_mock ) self.new_norma_obs = self.norma_obs.deep_copy().set_observation_type( self.norma_obs ) self.new_obs = None self.swap_it() #we have to denormalize the data now self.new_obs = self.norma_obs.customized_copy( data = self.new_norma_obs.data()*self.new_norma_obs.epsilons().data().as_double()*flex.sqrt(self.norma_calc_const), sigmas = self.new_norma_obs.sigmas()*self.new_norma_obs.epsilons().data().as_double()*flex.sqrt(self.norma_calc_const) ).set_observation_type( self.miller_obs )
def read_target_files(target_files, d_min, d_max, normalization, log_out): ret = collections.OrderedDict() for i, f in enumerate(target_files): f = iotbx.file_reader.any_file(f, force_type="hkl", raise_sorry_if_errors=True) arrays = f.file_server.get_miller_arrays(None) scores = iotbx.reflection_file_utils.get_xray_data_scores( arrays, ignore_all_zeros=True, prefer_anomalous=False, prefer_amplitudes=False) array = arrays[scores.index(max(scores))] log_out.write("# target%.3d = %s %s\n" % (i, array.info(), array.d_max_min())) if array.anomalous_flag(): array = array.average_bijvoet_mates() array = array.as_intensity_array().resolution_filter(d_max=d_max, d_min=d_min) if normalization == "E": normaliser = kernel_normalisation(array, auto_kernel=True) ret[f] = array.customized_copy( data=array.data() / normaliser.normalizer_for_miller_array, sigmas=array.sigmas() / normaliser.normalizer_for_miller_array if array.sigmas() else None) else: ret[f] = array return ret
def normalise_all(self): ## normalise all difference data please for set in self.ano_and_iso: tmp_norm = absolute_scaling.kernel_normalisation(set, auto_kernel=True) set = tmp_norm.normalised_miller.deep_copy().set_observation_type( tmp_norm.normalised_miller)
def test_kernel_based_normalisation(): miller_array = random_data(35.0, d_min=2.5 ) normalizer = absolute_scaling.kernel_normalisation( miller_array, auto_kernel=True) z_values = normalizer.normalised_miller.data()/\ normalizer.normalised_miller.epsilons().data().as_double() z_values = flex.mean(z_values) assert approx_equal(1.0,z_values,eps=0.05)
def normalise_all(self): ## normalise all difference data please for set in self.ano_and_iso: tmp_norm = absolute_scaling.kernel_normalisation( set, auto_kernel=True) set = tmp_norm.normalised_miller.deep_copy().set_observation_type( tmp_norm.normalised_miller)
def test_kernel_based_normalisation(): miller_array = random_data(35.0, d_min=2.5) normalizer = absolute_scaling.kernel_normalisation(miller_array, auto_kernel=True) z_values = normalizer.normalised_miller.data()/\ normalizer.normalised_miller.epsilons().data().as_double() z_values = flex.mean(z_values) assert approx_equal(1.0, z_values, eps=0.05)
def __init__(self, lambda1, lambda2, k1, k2, options, out=None): self.out = out if self.out == None: self.out = sys.stdout self.options = options print("FA estimation", file=self.out) print("=============", file=self.out) if k1 is None: raise Sorry( "f\"(w1)/f\"(w2) ratio is not defined. Please provide f\" values upon input" ) if k2 is None: if self.options.protocol == 'algebraic': raise Sorry(""" delta f' f\" ratio is not defined. Either provide f' and f\" values upon input, or chose different Fa estimation protocol. """) self.options = options protocol = {'algebraic': False, 'cns': False, 'combine_ano': False} protocol[self.options.protocol] = True self.fa_values = None if protocol['algebraic']: print(" Using algebraic approach to estimate FA values ", file=self.out) print(file=self.out) tmp = singh_ramasheshan_fa_estimate(lambda1, lambda2, k1, k2) self.fa_values = tmp.fa.f_sq_as_f() if protocol['cns']: print(" Using CNS approach to estimate FA values ", file=self.out) print(file=self.out) tmp = cns_fa_driver([lambda1, lambda2]) self.fa_values = tmp.fa if protocol['combine_ano']: print(" Combining anomalous data only", file=self.out) print(file=self.out) tmp = mum_dad(lambda1, lambda2, k1) self.fa_values = tmp.dad norma = absolute_scaling.kernel_normalisation(self.fa_values, auto_kernel=True) self.fa_values = norma.normalised_miller.f_sq_as_f()
def kernel_normalisation(intensities): """Kernel normalisation of the input intensities. Args: intensities (cctbx.miller.array): The intensities to be normalised. Returns: cctbx.miller.array: The normalised intensities. """ normalisation = absolute_scaling.kernel_normalisation(intensities, auto_kernel=True) return normalisation.normalised_miller.deep_copy().set_info( intensities.info())
def __init__(self, ano, iso, options, out=None): if out == None: out = sys.stdout ## get stuff self.options = options self.iso = iso.deep_copy().map_to_asu() self.ano = ano.deep_copy().map_to_asu() ## get common sets self.iso, self.ano = self.iso.common_sets( self.ano ) ## perform normalisation normalizer_iso = absolute_scaling.kernel_normalisation( self.iso, auto_kernel=True, n_term=options.number_of_terms_in_normalisation_curve) normalizer_ano = absolute_scaling.kernel_normalisation( self.ano, auto_kernel=True, n_term=options.number_of_terms_in_normalisation_curve) self.fa = self.iso.customized_copy( data = flex.sqrt( self.iso.data()*self.iso.data()\ /normalizer_iso.normalizer_for_miller_array + self.ano.data()*self.ano.data()\ /normalizer_ano.normalizer_for_miller_array ), sigmas = flex.sqrt( self.iso.sigmas()*self.iso.sigmas()\ /(normalizer_iso.normalizer_for_miller_array* normalizer_iso.normalizer_for_miller_array ) + self.ano.sigmas()*self.ano.sigmas()\ /(normalizer_ano.normalizer_for_miller_array *normalizer_ano.normalizer_for_miller_array) ))
def __init__(self, ano, iso, options, out=None): if out == None: out = sys.stdout ## get stuff self.options = options self.iso = iso.deep_copy().map_to_asu() self.ano = ano.deep_copy().map_to_asu() ## get common sets self.iso, self.ano = self.iso.common_sets(self.ano) ## perform normalisation normalizer_iso = absolute_scaling.kernel_normalisation( self.iso, auto_kernel=True, n_term=options.number_of_terms_in_normalisation_curve) normalizer_ano = absolute_scaling.kernel_normalisation( self.ano, auto_kernel=True, n_term=options.number_of_terms_in_normalisation_curve) self.fa = self.iso.customized_copy( data = flex.sqrt( self.iso.data()*self.iso.data()\ /normalizer_iso.normalizer_for_miller_array + self.ano.data()*self.ano.data()\ /normalizer_ano.normalizer_for_miller_array ), sigmas = flex.sqrt( self.iso.sigmas()*self.iso.sigmas()\ /(normalizer_iso.normalizer_for_miller_array* normalizer_iso.normalizer_for_miller_array ) + self.ano.sigmas()*self.ano.sigmas()\ /(normalizer_ano.normalizer_for_miller_array *normalizer_ano.normalizer_for_miller_array) ))
def run(params, target_files): assert params.normalization in ("no", "E") ofs = open(params.dat_out, "w") xac_files = util.read_path_list(params.lstin) targets = read_target_files(target_files, params.d_min, params.d_max, params.normalization, ofs) cellcon = CellConstraints(targets.values()[0].space_group()) #for i, t in enumerate(targets): ofs.write("# target%.3d = %s\n" % (i,t)) ofs.write("# normalization = %s\n" % params.normalization) ofs.write("# d_min, d_max = %s, %s\n" % (params.d_min, params.d_max)) ofs.write("file %s " % cellcon.get_label_for_free_params()) ofs.write(" ".join( map(lambda x: "cc.%.3d nref.%.3d" % (x, x), xrange(len(targets))))) ofs.write("\n") for xac_file in xac_files: print "reading", xac_file xac = xds_ascii.XDS_ASCII(xac_file) xac.remove_rejected() iobs = xac.i_obs(anomalous_flag=False).merge_equivalents( use_internal_variance=False).array() ofs.write("%s %s" % (xac_file, cellcon.format_free_params(iobs.unit_cell()))) fail_flag = False if params.normalization == "E": try: normaliser = kernel_normalisation(iobs, auto_kernel=True) iobs = iobs.customized_copy( data=iobs.data() / normaliser.normalizer_for_miller_array, sigmas=iobs.sigmas() / normaliser.normalizer_for_miller_array) except: fail_flag = True for i, ta in enumerate(targets.values()): if fail_flag: ofs.write(" % .4f %4d" % cc_num) else: cc_num = calc_cc(iobs, ta) ofs.write(" % .4f %4d" % cc_num) ofs.write("\n")
def __init__(self, miller_obs, r_free_flags, out=None): self.out = out if self.out is None: self.out = sys.stdout if out == "silent": self.out = null_out() # the original miller array self.miller_obs = miller_obs if self.miller_obs.observation_type() is None: raise Sorry("Unknown observation type") # we make a working copy of the above miller array self.work_obs = self.miller_obs.deep_copy().set_observation_type( self.miller_obs) if not self.work_obs.is_xray_intensity_array(): self.work_obs = self.work_obs.f_as_f_sq() if not self.miller_obs.is_xray_amplitude_array(): self.miller_obs = self.miller_obs.f_sq_as_f() self.r_free_flags = r_free_flags #----------------------- # These calculations are needed for wilson based outlier rejection # # Normalize the data normalizer = absolute_scaling.kernel_normalisation(self.work_obs, auto_kernel=True) self.norma_work = self.work_obs.customized_copy( data=normalizer.normalised_miller.data() / normalizer.normalised_miller.epsilons().data().as_double()) assert (flex.min(self.norma_work.data()) >= 0) # split things into centric and acentric sets please self.centric_work = self.norma_work.select_centric( ).set_observation_type(self.norma_work) self.acentric_work = self.norma_work.select_acentric( ).set_observation_type(self.norma_work)
def __init__(self, miller_obs, r_free_flags, out=None): self.out = out if self.out is None: self.out = sys.stdout if out == "silent": self.out = null_out() # the original miller array self.miller_obs = miller_obs if self.miller_obs.observation_type() is None: raise Sorry("Unknown observation type") # we make a working copy of the above miller array self.work_obs = self.miller_obs.deep_copy().set_observation_type(self.miller_obs) if not self.work_obs.is_xray_intensity_array(): self.work_obs = self.work_obs.f_as_f_sq() if not self.miller_obs.is_xray_amplitude_array(): self.miller_obs = self.miller_obs.f_sq_as_f() self.r_free_flags = r_free_flags # ----------------------- # These calculations are needed for wilson based outlier rejection # # Normalize the data normalizer = absolute_scaling.kernel_normalisation(self.work_obs, auto_kernel=True) self.norma_work = self.work_obs.customized_copy( data=normalizer.normalised_miller.data() / normalizer.normalised_miller.epsilons().data().as_double() ) assert flex.min(self.norma_work.data()) >= 0 # split things into centric and acentric sets please self.centric_work = self.norma_work.select_centric().set_observation_type(self.norma_work) self.acentric_work = self.norma_work.select_acentric().set_observation_type(self.norma_work)
def __init__(self, miller_obs, miller_calc, r_free_flags, ta_d, kernel_width_free_reflections=None, kernel_width_d_star_cubed=None, kernel_in_bin_centers=False, kernel_on_chebyshev_nodes=True, n_sampling_points=20, n_chebyshev_terms=10, use_sampling_sum_weights=False, make_checks_and_clean_up=True): assert [kernel_width_free_reflections, kernel_width_d_star_cubed].count(None) == 1 self.miller_obs = miller_obs self.miller_calc = abs(miller_calc) self.r_free_flags = r_free_flags self.kernel_width_free_reflections = kernel_width_free_reflections self.kernel_width_d_star_cubed = kernel_width_d_star_cubed self.n_chebyshev_terms = n_chebyshev_terms self.ta_d = ta_d if make_checks_and_clean_up: self.miller_obs = self.miller_obs.map_to_asu() self.miller_calc = self.miller_calc.map_to_asu() self.r_free_flags = self.r_free_flags.map_to_asu() assert self.r_free_flags.indices().all_eq( self.miller_obs.indices() ) self.miller_calc = self.miller_calc.common_set( self.miller_obs ) assert self.r_free_flags.indices().all_eq( self.miller_calc.indices() ) assert self.miller_obs.is_real_array() if self.miller_obs.is_xray_intensity_array(): self.miller_obs = self.miller_obs.f_sq_as_f() assert self.miller_obs.observation_type() is None or \ self.miller_obs.is_xray_amplitude_array() if self.miller_calc.observation_type() is None: self.miller_calc = self.miller_calc.set_observation_type( self.miller_obs) # get normalized data please self.normalized_obs_f = absolute_scaling.kernel_normalisation( self.miller_obs, auto_kernel=True) self.normalized_obs =self.normalized_obs_f.normalised_miller_dev_eps.f_sq_as_f() self.normalized_calc_f = absolute_scaling.kernel_normalisation( self.miller_calc, auto_kernel=True) self.normalized_calc =self.normalized_calc_f.normalised_miller_dev_eps.f_sq_as_f() # get the 'free data' self.free_norm_obs = self.normalized_obs.select( self.r_free_flags.data() ) self.free_norm_calc= self.normalized_calc.select( self.r_free_flags.data() ) if self.free_norm_obs.data().size() <= 0: raise RuntimeError("No free reflections.") # if (self.kernel_width_d_star_cubed is None): # self.kernel_width_d_star_cubed=sigmaa_estimator_kernel_width_d_star_cubed( # r_free_flags=self.r_free_flags, # kernel_width_free_reflections=self.kernel_width_free_reflections) # self.sigma_target_functor = ext.sigmaa_estimator( # e_obs = self.free_norm_obs.data(), # e_calc = self.free_norm_calc.data(), # centric = self.free_norm_obs.centric_flags().data(), # d_star_cubed = self.free_norm_obs.d_star_cubed().data() , # width=self.kernel_width_d_star_cubed) # d_star_cubed_overall = self.miller_obs.d_star_cubed().data() # self.min_h = flex.min( d_star_cubed_overall ) # self.max_h = flex.max( d_star_cubed_overall ) # self.h_array = None # if (kernel_in_bin_centers): # self.h_array = flex.double( xrange(1,n_sampling_points*2,2) )*( # self.max_h-self.min_h)/(n_sampling_points*2)+self.min_h # else: # self.min_h *= 0.99 # self.max_h *= 1.01 # if kernel_on_chebyshev_nodes: # self.h_array = chebyshev_lsq_fit.chebyshev_nodes( # n=n_sampling_points, # low=self.min_h, # high=self.max_h, # include_limits=True) # else: # self.h_array = flex.double( range(n_sampling_points) )*( # self.max_h-self.min_h)/float(n_sampling_points-1.0)+self.min_h # assert self.h_array.size() == n_sampling_points # self.sigmaa_array = flex.double() # self.sigmaa_array.reserve(self.h_array.size()) # self.sum_weights = flex.double() # self.sum_weights.reserve(self.h_array.size()) # for h in self.h_array: # stimator = sigmaa_point_estimator(self.sigma_target_functor, h) # self.sigmaa_array.append( stimator.sigmaa ) # self.sum_weights.append( # self.sigma_target_functor.sum_weights(d_star_cubed=h)) # # fit a smooth function # reparam_sa = -flex.log( 1.0/self.sigmaa_array -1.0 ) # if (use_sampling_sum_weights): # w_obs = flex.sqrt(self.sum_weights) # else: # w_obs = None # fit_lsq = chebyshev_lsq_fit.chebyshev_lsq_fit( # n_terms=self.n_chebyshev_terms, # x_obs=self.h_array, # y_obs=reparam_sa, # w_obs=w_obs) # cheb_pol = chebyshev_polynome( # self.n_chebyshev_terms, # self.min_h, # self.max_h, # fit_lsq.coefs) # def reverse_reparam(values): return 1.0/(1.0 + flex.exp(-values)) # self.sigmaa_fitted = reverse_reparam(cheb_pol.f(self.h_array)) # self.sigmaa_miller_array = reverse_reparam(cheb_pol.f(d_star_cubed_overall)) # assert flex.min(self.sigmaa_miller_array) >= 0 # assert flex.max(self.sigmaa_miller_array) <= 1 # self.sigmaa_miller_array = self.miller_obs.array(data=self.sigmaa_miller_array) self.alpha = None self.beta = None self.fom_array = None self.ta_d_miller = self.miller_obs.array(data=self.ta_d)
def run(params, mtzfiles): arrays = get_arrays(mtzfiles, d_min=params.dmin, d_max=params.dmax) if params.take_common: arrays = commonalize(arrays) maxlen_f = max(map(lambda x: len(x[0]), arrays)) ref_f_obs = arrays[0][1] scales = [] for f, f_obs, f_model, flag in arrays: if ref_f_obs == f_obs: k, B = 1., 0 else: k, B = kBdecider(ref_f_obs, f_obs).run() scales.append((k, B)) if params.reference != "first": if params.reference == "bmin": # scale to strongest kref, bref = max(scales, key=lambda x:x[1]) elif params.reference == "bmax": # scale to most weak kref, bref = min(scales, key=lambda x:x[1]) elif params.reference == "bmed": # scale to most weak perm = range(len(scales)) perm.sort(key=lambda i:scales[i][1]) kref, bref = scales[perm[len(perm)//2]] else: raise "Never reaches here" print "# Set K=%.2f B=%.2f as reference" % (kref,bref) scales = map(lambda x: (x[0]/kref, x[1]-bref), scales) # not bref-x[1], because negated later print ("%"+str(maxlen_f)+"s r_work r_free cc_work.E cc_free.E sigmaa fom k B") % "filename" for (f, f_obs, f_model, flag), (k, B) in zip(arrays, scales): d_star_sq = f_obs.d_star_sq().data() scale = k * flex.exp(-B*d_star_sq) # Normalized #f_obs.setup_binner(auto_binning=True) #f_model.setup_binner(auto_binning=True) #e_obs, e_model = map(lambda x:x.quasi_normalize_structure_factors(), (f_obs, f_model)) e_obs = absolute_scaling.kernel_normalisation(f_obs.customized_copy(data=f_obs.data()*scale, sigmas=None), auto_kernel=True) e_obs = e_obs.normalised_miller_dev_eps.f_sq_as_f() e_model = absolute_scaling.kernel_normalisation(f_model.customized_copy(data=f_model.data()*scale, sigmas=None), auto_kernel=True) e_model = e_model.normalised_miller_dev_eps.f_sq_as_f() f_obs_w, f_obs_t = f_obs.select(~flag.data()), f_obs.select(flag.data()) f_model_w, f_model_t = f_model.select(~flag.data()), f_model.select(flag.data()) e_obs_w, e_obs_t = e_obs.select(~flag.data()), e_obs.select(flag.data()) e_model_w, e_model_t = e_model.select(~flag.data()), e_model.select(flag.data()) r_work = calc_r(f_obs_w, f_model_w, scale.select(~flag.data())) r_free = calc_r(f_obs_t, f_model_t, scale.select(flag.data())) cc_work_E = calc_cc(e_obs_w, e_model_w, False) cc_free_E = calc_cc(e_obs_t, e_model_t, False) #cc_work_E2 = calc_cc(e_obs_w, e_model_w, True) #cc_free_E2 = calc_cc(e_obs_t, e_model_t, True) se = calc_sigmaa(f_obs, f_model, flag) sigmaa = flex.mean(se.sigmaa().data()) fom = flex.mean(se.fom().data()) print ("%"+str(maxlen_f)+"s %.4f %.4f % 7.4f % 7.4f %.4e %.4e %.3e %.3e") % (f, r_work, r_free, cc_work_E, cc_free_E, sigmaa, fom, k, B)
def do_clustering(self, nproc=1, b_scale=False, use_normalized=False, html_maker=None): self.clusters = {} prefix = os.path.join(self.wdir, "cctable") assert (b_scale, use_normalized).count(True) <= 1 if len(self.arrays) < 2: print "WARNING: less than two data! can't do cc-based clustering" self.clusters[1] = [float("nan"), [0]] return # Absolute scaling using Wilson-B factor if b_scale: from mmtbx.scaling.matthews import p_vm_calculator from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling ofs_wilson = open("%s_wilson_scales.dat" % prefix, "w") n_residues = p_vm_calculator(self.arrays.values()[0], 1, 0).best_guess ofs_wilson.write("# guessed n_residues= %d\n" % n_residues) ofs_wilson.write("file wilsonB\n") for f in self.arrays: arr = self.arrays[f] iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0) wilson_b = iso_scale_and_b.b_wilson ofs_wilson.write("%s %.3f\n" % (f, wilson_b)) if wilson_b > 0: # Ignoring data with B<0? is a bad idea.. but how..? tmp = flex.exp(-2. * wilson_b * arr.unit_cell().d_star_sq(arr.indices()) / 4.) self.arrays[f] = arr.customized_copy(data=arr.data() * tmp, sigmas=arr.sigmas() * tmp) ofs_wilson.close() elif use_normalized: from mmtbx.scaling.absolute_scaling import kernel_normalisation for f in self.arrays: arr = self.arrays[f] normaliser = kernel_normalisation(arr, auto_kernel=True) self.arrays[f] = arr.customized_copy( data=arr.data() / normaliser.normalizer_for_miller_array, sigmas=arr.sigmas() / normaliser.normalizer_for_miller_array) # Prep args = [] for i in xrange(len(self.arrays) - 1): for j in xrange(i + 1, len(self.arrays)): args.append((i, j)) # Calc all CC if self.use_sfdist: worker = lambda x: calc_sfdist(self.arrays.values()[x[0]], self.arrays.values()[x[1]]) else: worker = lambda x: calc_cc(self.arrays.values()[x[0]], self.arrays.values()[x[1]]) results = easy_mp.pool_map(fixed_func=worker, args=args, processes=nproc) # Check NaN and decide which data to remove idx_bad = {} nans = [] cc_data_for_html = [] for (i, j), (cc, nref) in zip(args, results): cc_data_for_html.append((i, j, cc, nref)) if cc == cc: continue idx_bad[i] = idx_bad.get(i, 0) + 1 idx_bad[j] = idx_bad.get(j, 0) + 1 nans.append([i, j]) if html_maker is not None: html_maker.add_cc_clustering_details(cc_data_for_html) idx_bad = idx_bad.items() idx_bad.sort(key=lambda x: x[1]) remove_idxes = set() for idx, badcount in reversed(idx_bad): if len(filter(lambda x: idx in x, nans)) == 0: continue remove_idxes.add(idx) nans = filter(lambda x: idx not in x, nans) if len(nans) == 0: break use_idxes = filter(lambda x: x not in remove_idxes, xrange(len(self.arrays))) # Make table: original index (in file list) -> new index (in matrix) count = 0 org2now = collections.OrderedDict() for i in xrange(len(self.arrays)): if i in remove_idxes: continue org2now[i] = count count += 1 if len(remove_idxes) > 0: open("%s_notused.lst" % prefix, "w").write("\n".join( map(lambda x: self.arrays.keys()[x], remove_idxes))) # Make matrix mat = numpy.zeros(shape=(len(use_idxes), len(use_idxes))) for (i, j), (cc, nref) in zip(args, results): if i in remove_idxes or j in remove_idxes: continue mat[org2now[j], org2now[i]] = cc open("%s.matrix" % prefix, "w").write(" ".join(map(lambda x: "%.4f" % x, mat.flatten()))) ofs = open("%s.dat" % prefix, "w") ofs.write(" i j cc nref\n") for (i, j), (cc, nref) in zip(args, results): ofs.write("%4d %4d %.4f %4d\n" % (i, j, cc, nref)) open("%s_ana.R" % prefix, "w").write("""\ treeToList2 <- function(htree) { # stolen from $CCP4/share/blend/R/blend0.R groups <- list() itree <- dim(htree$merge)[1] for (i in 1:itree) { il <- htree$merge[i,1] ir <- htree$merge[i,2] if (il < 0) lab1 <- htree$labels[-il] if (ir < 0) lab2 <- htree$labels[-ir] if (il > 0) lab1 <- groups[[il]] if (ir > 0) lab2 <- groups[[ir]] lab <- c(lab1,lab2) lab <- as.integer(lab) groups <- c(groups,list(lab)) } return(groups) } cc<-scan("%(prefix)s.matrix") md<-matrix(1-cc, ncol=%(ncol)d, byrow=TRUE) hc <- hclust(as.dist(md),method="ward") pdf("tree.pdf") plot(hc) dev.off() png("tree.png",height=1000,width=1000) plot(hc) dev.off() hc$labels <- c(%(hclabels)s) groups <- treeToList2(hc) cat("ClNumber Nds Clheight IDs\\n",file="./CLUSTERS.txt") for (i in 1:length(groups)) { sorted_groups <- sort(groups[[i]]) linea <- sprintf("%%04d %%4d %%7.3f %%s\\n", i,length(groups[[i]]),hc$height[i], paste(sorted_groups,collapse=" ")) cat(linea, file="./CLUSTERS.txt", append=TRUE) } # reference: http://www.coppelia.io/2014/07/converting-an-r-hclust-object-into-a-d3-js-dendrogram/ library(rjson) HCtoJSON<-function(hc){ labels<-hc$labels merge<-data.frame(hc$merge) for (i in (1:nrow(merge))) { if (merge[i,1]<0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]),list(name=labels[-merge[i,2]])))")))} else if (merge[i,1]>0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node", merge[i,1], ", list(name=labels[-merge[i,2]])))")))} else if (merge[i,1]<0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]), node", merge[i,2],"))")))} else if (merge[i,1]>0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node",merge[i,1] , ", node" , merge[i,2]," ))")))} } eval(parse(text=paste0("JSON<-toJSON(node",nrow(merge), ")"))) return(JSON) } JSON<-HCtoJSON(hc) cat(JSON, file="dendro.json") q(save="yes") """ % dict(prefix=os.path.basename(prefix), ncol=len(self.arrays), hclabels=",".join(map(lambda x: "%d" % (x + 1), org2now.keys())))) call(cmd="Rscript", arg="%s_ana.R" % os.path.basename(prefix), wdir=self.wdir) output = open(os.path.join(self.wdir, "CLUSTERS.txt")).readlines() for l in output[1:]: sp = l.split() clid, clheight, ids = sp[0], sp[2], sp[3:] self.clusters[int(clid)] = [float(clheight), map(int, ids)]
def __init__(self, miller_obs, miller_calc, r_free_flags, kernel_width_free_reflections=None, kernel_width_d_star_cubed=None, kernel_in_bin_centers=False, kernel_on_chebyshev_nodes=True, n_sampling_points=20, n_chebyshev_terms=10, use_sampling_sum_weights=False, make_checks_and_clean_up=True): assert [kernel_width_free_reflections, kernel_width_d_star_cubed].count(None) == 1 self.miller_obs = miller_obs self.miller_calc = abs(miller_calc) self.r_free_flags = r_free_flags self.kernel_width_free_reflections = kernel_width_free_reflections self.kernel_width_d_star_cubed = kernel_width_d_star_cubed self.n_chebyshev_terms = n_chebyshev_terms if make_checks_and_clean_up: self.miller_obs = self.miller_obs.map_to_asu() self.miller_calc = self.miller_calc.map_to_asu() self.r_free_flags = self.r_free_flags.map_to_asu() assert self.r_free_flags.indices().all_eq( self.miller_obs.indices() ) self.miller_calc = self.miller_calc.common_set( self.miller_obs ) assert self.r_free_flags.indices().all_eq( self.miller_calc.indices() ) assert self.miller_obs.is_real_array() if self.miller_obs.is_xray_intensity_array(): self.miller_obs = self.miller_obs.f_sq_as_f() assert self.miller_obs.observation_type() is None or \ self.miller_obs.is_xray_amplitude_array() if self.miller_calc.observation_type() is None: self.miller_calc = self.miller_calc.set_observation_type( self.miller_obs) # get normalized data please self.normalized_obs_f = absolute_scaling.kernel_normalisation( self.miller_obs, auto_kernel=True) self.normalized_obs =self.normalized_obs_f.normalised_miller_dev_eps.f_sq_as_f() self.normalized_calc_f = absolute_scaling.kernel_normalisation( self.miller_calc, auto_kernel=True) self.normalized_calc =self.normalized_calc_f.normalised_miller_dev_eps.f_sq_as_f() # get the 'free data' if(self.r_free_flags.data().count(True) == 0): self.r_free_flags = self.r_free_flags.array( data = ~self.r_free_flags.data()) self.free_norm_obs = self.normalized_obs.select( self.r_free_flags.data() ) self.free_norm_calc= self.normalized_calc.select( self.r_free_flags.data() ) if self.free_norm_obs.data().size() <= 0: raise RuntimeError("No free reflections.") if (self.kernel_width_d_star_cubed is None): self.kernel_width_d_star_cubed=sigmaa_estimator_kernel_width_d_star_cubed( r_free_flags=self.r_free_flags, kernel_width_free_reflections=self.kernel_width_free_reflections) self.sigma_target_functor = ext.sigmaa_estimator( e_obs = self.free_norm_obs.data(), e_calc = self.free_norm_calc.data(), centric = self.free_norm_obs.centric_flags().data(), d_star_cubed = self.free_norm_obs.d_star_cubed().data() , width=self.kernel_width_d_star_cubed) d_star_cubed_overall = self.miller_obs.d_star_cubed().data() self.min_h = flex.min( d_star_cubed_overall ) self.max_h = flex.max( d_star_cubed_overall ) self.h_array = None if (kernel_in_bin_centers): self.h_array = flex.double( range(1,n_sampling_points*2,2) )*( self.max_h-self.min_h)/(n_sampling_points*2)+self.min_h else: self.min_h *= 0.99 self.max_h *= 1.01 if kernel_on_chebyshev_nodes: self.h_array = chebyshev_lsq_fit.chebyshev_nodes( n=n_sampling_points, low=self.min_h, high=self.max_h, include_limits=True) else: self.h_array = flex.double( range(n_sampling_points) )*( self.max_h-self.min_h)/float(n_sampling_points-1.0)+self.min_h assert self.h_array.size() == n_sampling_points self.sigmaa_array = flex.double() self.sigmaa_array.reserve(self.h_array.size()) self.sum_weights = flex.double() self.sum_weights.reserve(self.h_array.size()) for h in self.h_array: stimator = sigmaa_point_estimator(self.sigma_target_functor, h) self.sigmaa_array.append( stimator.sigmaa ) self.sum_weights.append( self.sigma_target_functor.sum_weights(d_star_cubed=h)) # fit a smooth function reparam_sa = -flex.log( 1.0/self.sigmaa_array -1.0 ) if (use_sampling_sum_weights): w_obs = flex.sqrt(self.sum_weights) else: w_obs = None fit_lsq = chebyshev_lsq_fit.chebyshev_lsq_fit( n_terms=self.n_chebyshev_terms, x_obs=self.h_array, y_obs=reparam_sa, w_obs=w_obs) cheb_pol = chebyshev_polynome( self.n_chebyshev_terms, self.min_h, self.max_h, fit_lsq.coefs) def reverse_reparam(values): return 1.0/(1.0 + flex.exp(-values)) self.sigmaa_fitted = reverse_reparam(cheb_pol.f(self.h_array)) self.sigmaa_miller_array = reverse_reparam(cheb_pol.f(d_star_cubed_overall)) assert flex.min(self.sigmaa_miller_array) >= 0 assert flex.max(self.sigmaa_miller_array) <= 1 self.sigmaa_miller_array = self.miller_obs.array(data=self.sigmaa_miller_array) self.alpha = None self.beta = None self.fom_array = None
def do_clustering(self, nproc=1, b_scale=False, use_normalized=False, cluster_method="ward", distance_eqn="sqrt(1-cc)", min_common_refs=3, html_maker=None): """ Using correlation as distance metric (for hierarchical clustering) https://stats.stackexchange.com/questions/165194/using-correlation-as-distance-metric-for-hierarchical-clustering Correlation "Distances" and Hierarchical Clustering http://research.stowers.org/mcm/efg/R/Visualization/cor-cluster/index.htm """ self.clusters = {} prefix = os.path.join(self.wdir, "cctable") assert (b_scale, use_normalized).count(True) <= 1 distance_eqns = { "sqrt(1-cc)": lambda x: numpy.sqrt(1. - x), "1-cc": lambda x: 1. - x, "sqrt(1-cc^2)": lambda x: numpy.sqrt(1. - x**2), } cc_to_distance = distance_eqns[ distance_eqn] # Fail when unknown options assert cluster_method in ("single", "complete", "average", "weighted", "centroid", "median", "ward" ) # available methods in scipy if len(self.arrays) < 2: print "WARNING: less than two data! can't do cc-based clustering" self.clusters[1] = [float("nan"), [0]] return # Absolute scaling using Wilson-B factor if b_scale: from mmtbx.scaling.matthews import p_vm_calculator from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling ofs_wilson = open("%s_wilson_scales.dat" % prefix, "w") n_residues = p_vm_calculator(self.arrays.values()[0], 1, 0).best_guess ofs_wilson.write("# guessed n_residues= %d\n" % n_residues) ofs_wilson.write("file wilsonB\n") for f in self.arrays: arr = self.arrays[f] iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0) wilson_b = iso_scale_and_b.b_wilson ofs_wilson.write("%s %.3f\n" % (f, wilson_b)) if wilson_b > 0: # Ignoring data with B<0? is a bad idea.. but how..? tmp = flex.exp(-2. * wilson_b * arr.unit_cell().d_star_sq(arr.indices()) / 4.) self.arrays[f] = arr.customized_copy(data=arr.data() * tmp, sigmas=arr.sigmas() * tmp) ofs_wilson.close() elif use_normalized: from mmtbx.scaling.absolute_scaling import kernel_normalisation failed = {} for f in self.arrays: arr = self.arrays[f] try: normaliser = kernel_normalisation(arr, auto_kernel=True) self.arrays[f] = arr.customized_copy( data=arr.data() / normaliser.normalizer_for_miller_array, sigmas=arr.sigmas() / normaliser.normalizer_for_miller_array) except Exception, e: failed.setdefault(e.message, []).append(f) if failed: msg = "" for r in failed: msg += " %s\n%s\n" % (r, "\n".join( map(lambda x: " %s" % x, failed[r]))) raise Sorry( "intensity normalization failed by following reason(s):\n%s" % msg)
class basic_analyses(object): # XXX is this ever used? def __init__(self, miller_array, phil_object, out=None, out_plot=None, miller_calc=None, original_intensities=None, completeness_as_non_anomalous=None, verbose=0): if out is None: out = sys.stdout if verbose > 0: print >> out print >> out print >> out, "Matthews coefficient and Solvent content statistics" n_copies_solc = 1.0 self.nres_known = False if (phil_object.scaling.input.asu_contents.n_residues is not None or phil_object.scaling.input.asu_contents.n_bases is not None): self.nres_known = True if (phil_object.scaling.input.asu_contents.sequence_file is not None): print >> out, " warning: ignoring sequence file" elif (phil_object.scaling.input.asu_contents.sequence_file is not None): print >> out, " determining composition from sequence file %s" % \ phil_object.scaling.input.asu_contents.sequence_file seq_comp = iotbx.bioinformatics.composition_from_sequence_file( file_name=phil_object.scaling.input.asu_contents.sequence_file, log=out) if (seq_comp is not None): phil_object.scaling.input.asu_contents.n_residues = seq_comp.n_residues phil_object.scaling.input.asu_contents.n_bases = seq_comp.n_bases self.nres_known = True matthews_results = matthews.matthews_rupp( crystal_symmetry=miller_array, n_residues=phil_object.scaling.input.asu_contents.n_residues, n_bases=phil_object.scaling.input.asu_contents.n_bases, out=out, verbose=1) phil_object.scaling.input.asu_contents.n_residues = matthews_results[0] phil_object.scaling.input.asu_contents.n_bases = matthews_results[1] n_copies_solc = matthews_results[2] self.matthews_results = matthews_results if phil_object.scaling.input.asu_contents.n_copies_per_asu is not None: n_copies_solc = phil_object.scaling.input.asu_contents.n_copies_per_asu self.defined_copies = n_copies_solc if verbose > 0: print >> out, "Number of copies per asymmetric unit provided" print >> out, " Will use user specified value of ", n_copies_solc else: phil_object.scaling.input.asu_contents.n_copies_per_asu = n_copies_solc self.guessed_copies = n_copies_solc # first report on I over sigma miller_array_new = miller_array self.data_strength = None miller_array_intensities = miller_array if (original_intensities is not None): assert original_intensities.is_xray_intensity_array() miller_array_intensities = original_intensities if miller_array_intensities.sigmas() is not None: data_strength = data_statistics.i_sigi_completeness_stats( miller_array_intensities, isigi_cut=phil_object.scaling.input.parameters. misc_twin_parameters.twin_test_cuts.isigi_cut, completeness_cut=phil_object.scaling.input.parameters. misc_twin_parameters.twin_test_cuts.completeness_cut, completeness_as_non_anomalous=completeness_as_non_anomalous) data_strength.show(out) self.data_strength = data_strength if phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution is None: if data_strength.resolution_cut > data_strength.resolution_at_least: phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution = data_strength.resolution_at_least else: phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution = data_strength.resolution_cut ## Isotropic wilson scaling if verbose > 0: print >> out print >> out print >> out, "Maximum likelihood isotropic Wilson scaling " n_residues = phil_object.scaling.input.asu_contents.n_residues n_bases = phil_object.scaling.input.asu_contents.n_bases if n_residues is None: n_residues = 0 if n_bases is None: n_bases = 0 if n_bases + n_residues == 0: raise Sorry("No scatterers available") iso_scale_and_b = absolute_scaling.ml_iso_absolute_scaling( miller_array=miller_array_new, n_residues=n_residues * miller_array.space_group().order_z() * n_copies_solc, n_bases=n_bases * miller_array.space_group().order_z() * n_copies_solc) iso_scale_and_b.show(out=out, verbose=verbose) self.iso_scale_and_b = iso_scale_and_b ## Store the b and scale values from isotropic ML scaling self.iso_p_scale = iso_scale_and_b.p_scale self.iso_b_wilson = iso_scale_and_b.b_wilson ## Anisotropic ml wilson scaling if verbose > 0: print >> out print >> out print >> out, "Maximum likelihood anisotropic Wilson scaling " aniso_scale_and_b = absolute_scaling.ml_aniso_absolute_scaling( miller_array=miller_array_new, n_residues=n_residues * miller_array.space_group().order_z() * n_copies_solc, n_bases=n_bases * miller_array.space_group().order_z() * n_copies_solc) aniso_scale_and_b.show(out=out, verbose=1) self.aniso_scale_and_b = aniso_scale_and_b try: b_cart = aniso_scale_and_b.b_cart except AttributeError, e: print >> out, "*** ERROR ***" print >> out, str(e) show_exception_info_if_full_testing() return self.aniso_p_scale = aniso_scale_and_b.p_scale self.aniso_u_star = aniso_scale_and_b.u_star self.aniso_b_cart = aniso_scale_and_b.b_cart # XXX: for GUI self.overall_b_cart = getattr(aniso_scale_and_b, "overall_b_cart", None) ## Correcting for anisotropy if verbose > 0: print >> out, "Correcting for anisotropy in the data" print >> out b_cart_observed = aniso_scale_and_b.b_cart b_trace_average = (b_cart_observed[0] + b_cart_observed[1] + b_cart_observed[2]) / 3.0 b_trace_min = b_cart_observed[0] if b_cart_observed[1] < b_trace_min: b_trace_min = b_cart_observed[1] if b_cart_observed[2] < b_trace_min: b_trace_min = b_cart_observed[2] if phil_object.scaling.input.optional.aniso.final_b == "eigen_min": b_use = aniso_scale_and_b.eigen_values[2] elif phil_object.scaling.input.optional.aniso.final_b == "eigen_mean": b_use = flex.mean(aniso_scale_and_b.eigen_values) elif phil_object.scaling.input.optional.aniso.final_b == "user_b_iso": assert phil_object.scaling.input.optional.aniso.b_iso is not None b_use = phil_object.scaling.input.optional.aniso.b_iso else: b_use = 30 b_cart_aniso_removed = [-b_use, -b_use, -b_use, 0, 0, 0] u_star_aniso_removed = adptbx.u_cart_as_u_star( miller_array.unit_cell(), adptbx.b_as_u(b_cart_aniso_removed)) ## I do things in two steps, but can easely be done in 1 step ## just for clarity, thats all. self.no_aniso_array = absolute_scaling.anisotropic_correction( miller_array_new, 0.0, aniso_scale_and_b.u_star) self.no_aniso_array = absolute_scaling.anisotropic_correction( self.no_aniso_array, 0.0, u_star_aniso_removed) self.no_aniso_array = self.no_aniso_array.set_observation_type( miller_array) ## Make normalised structure factors please sel_big = self.no_aniso_array.data() > 1.e+50 self.no_aniso_array = self.no_aniso_array.array( data=self.no_aniso_array.data().set_selected(sel_big, 0)) self.no_aniso_array = self.no_aniso_array.set_observation_type( miller_array) normalistion = absolute_scaling.kernel_normalisation( self.no_aniso_array, auto_kernel=True) self.normalised_miller = normalistion.normalised_miller.deep_copy() self.phil_object = phil_object ## Some basic statistics and sanity checks follow if verbose > 0: print >> out, "Some basic intensity statistics follow." print >> out basic_data_stats = data_statistics.basic_intensity_statistics( miller_array, aniso_scale_and_b.p_scale, aniso_scale_and_b.u_star, iso_scale_and_b.scat_info, out=out, out_plot=out_plot) self.basic_data_stats = basic_data_stats self.miller_array = basic_data_stats.new_miller #relative wilson plot self.rel_wilson = None if (miller_calc is not None) and (miller_calc.d_min() < 4.0): try: self.rel_wilson = relative_wilson.relative_wilson( miller_obs=miller_array, miller_calc=miller_calc) except RuntimeError, e: print >> out, "*** Error calculating relative Wilson plot - skipping." print >> out, ""
def kernel_normalisation(self): normalisation = absolute_scaling.kernel_normalisation( self.intensities, auto_kernel=True) self.intensities = normalisation.normalised_miller.deep_copy().set_info( self.intensities.info())
def __init__(self, lambda1, lambda2, k1, k2, options, out=None): self.out=out if self.out==None: self.out=sys.stdout self.options = options print >> self.out, "FA estimation" print >> self.out, "=============" if k1 is None: raise Sorry("f\"(w1)/f\"(w2) ratio is not defined. Please provide f\" values upon input") if k2 is None: if self.options.protocol=='algebraic': raise Sorry(""" delta f' f\" ratio is not defined. Either provide f' and f\" values upon input, or chose different Fa estimation protocol. """) self.options = options protocol = {'algebraic': False, 'cns': False, 'combine_ano': False} protocol[ self.options.protocol ] = True self.fa_values = None if protocol['algebraic']: print >> self.out, " Using algebraic approach to estimate FA values " print >> self.out tmp = singh_ramasheshan_fa_estimate( lambda1, lambda2, k1, k2) self.fa_values = tmp.fa.f_sq_as_f() if protocol['cns']: print >> self.out, " Using CNS approach to estimate FA values " print >> self.out tmp = cns_fa_driver( [lambda1, lambda2] ) self.fa_values = tmp.fa if protocol['combine_ano']: print >> self.out, " Combining anomalous data only" print >> self.out tmp = mum_dad( lambda1, lambda2, k1) self.fa_values = tmp.dad norma = absolute_scaling.kernel_normalisation( self.fa_values, auto_kernel=True) self.fa_values = norma.normalised_miller.f_sq_as_f()
def run(args): import libtbx from libtbx import easy_pickle from dials.util import log from dials.util.options import OptionParser parser = OptionParser( #usage=usage, phil=phil_scope, read_reflections=True, read_datablocks=False, read_experiments=True, check_format=False, #epilog=help_message ) params, options, args = parser.parse_args(show_diff_phil=False, return_unhandled=True) # Configure the logging log.config(params.verbosity, info=params.output.log, debug=params.output.debug_log) from dials.util.version import dials_version logger.info(dials_version()) # Log the diff phil diff_phil = parser.diff_phil.as_str() if diff_phil is not '': logger.info('The following parameters have been modified:\n') logger.info(diff_phil) if params.seed is not None: import random flex.set_random_seed(params.seed) random.seed(params.seed) if params.save_plot and not params.animate: import matplotlib # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear matplotlib.use('Agg') # use a non-interactive backend datasets_input = [] experiments = flatten_experiments(params.input.experiments) reflections = flatten_reflections(params.input.reflections) if len(experiments) or len(reflections): if len(reflections) == 1: reflections_input = reflections[0] reflections = [] for i in range(len(experiments)): reflections.append( reflections_input.select(reflections_input['id'] == i)) if len(experiments) > len(reflections): flattened_reflections = [] for refl in reflections: for i in range(0, flex.max(refl['id']) + 1): sel = refl['id'] == i flattened_reflections.append(refl.select(sel)) reflections = flattened_reflections assert len(experiments) == len(reflections) i_refl = 0 for i_expt in enumerate(experiments): refl = reflections[i_refl] for expt, refl in zip(experiments, reflections): crystal_symmetry = crystal.symmetry( unit_cell=expt.crystal.get_unit_cell(), space_group=expt.crystal.get_space_group()) if 0 and 'intensity.prf.value' in refl: sel = refl.get_flags(refl.flags.integrated_prf) assert sel.count(True) > 0 refl = refl.select(sel) data = refl['intensity.prf.value'] variances = refl['intensity.prf.variance'] else: assert 'intensity.sum.value' in refl sel = refl.get_flags(refl.flags.integrated_sum) assert sel.count(True) > 0 refl = refl.select(sel) data = refl['intensity.sum.value'] variances = refl['intensity.sum.variance'] # FIXME probably need to do some filtering of intensities similar to that # done in export_mtz miller_indices = refl['miller_index'] assert variances.all_gt(0) sigmas = flex.sqrt(variances) miller_set = miller.set(crystal_symmetry, miller_indices, anomalous_flag=False) intensities = miller.array(miller_set, data=data, sigmas=sigmas) intensities.set_observation_type_xray_intensity() intensities.set_info( miller.array_info(source='DIALS', source_type='pickle')) datasets_input.append(intensities) files = args for file_name in files: try: data = easy_pickle.load(file_name) intensities = data['observations'][0] intensities.set_info( miller.array_info(source=file_name, source_type='pickle')) intensities = intensities.customized_copy( anomalous_flag=False).set_info(intensities.info()) batches = None except Exception: reader = any_reflection_file(file_name) assert reader.file_type() == 'ccp4_mtz' as_miller_arrays = reader.as_miller_arrays(merge_equivalents=False) intensities = [ ma for ma in as_miller_arrays if ma.info().labels == ['I', 'SIGI'] ][0] batches = [ ma for ma in as_miller_arrays if ma.info().labels == ['BATCH'] ] if len(batches): batches = batches[0] else: batches = None mtz_object = reader.file_content() intensities = intensities.customized_copy( anomalous_flag=False, indices=mtz_object.extract_original_index_miller_indices( )).set_info(intensities.info()) intensities.set_observation_type_xray_intensity() datasets_input.append(intensities) if len(datasets_input) == 0: raise Sorry('No valid reflection files provided on command line') datasets = [] for intensities in datasets_input: if params.batch is not None: assert batches is not None bmin, bmax = params.batch assert bmax >= bmin sel = (batches.data() >= bmin) & (batches.data() <= bmax) assert sel.count(True) > 0 intensities = intensities.select(sel) if params.min_i_mean_over_sigma_mean is not None and ( params.d_min is libtbx.Auto or params.d_min is not None): from xia2.Modules import Resolutionizer rparams = Resolutionizer.phil_defaults.extract().resolutionizer rparams.nbins = 20 resolutionizer = Resolutionizer.resolutionizer( intensities, None, rparams) i_mean_over_sigma_mean = 4 d_min = resolutionizer.resolution_i_mean_over_sigma_mean( i_mean_over_sigma_mean) if params.d_min is libtbx.Auto: intensities = intensities.resolution_filter( d_min=d_min).set_info(intensities.info()) if params.verbose: logger.info('Selecting reflections with d > %.2f' % d_min) elif d_min > params.d_min: logger.info('Rejecting dataset %s as d_min too low (%.2f)' % (file_name, d_min)) continue else: logger.info('Estimated d_min for %s: %.2f' % (file_name, d_min)) elif params.d_min not in (None, libtbx.Auto): intensities = intensities.resolution_filter( d_min=params.d_min).set_info(intensities.info()) if params.normalisation == 'kernel': from mmtbx.scaling import absolute_scaling normalisation = absolute_scaling.kernel_normalisation( intensities, auto_kernel=True) intensities = normalisation.normalised_miller.deep_copy() cb_op_to_primitive = intensities.change_of_basis_op_to_primitive_setting( ) intensities = intensities.change_basis(cb_op_to_primitive) if params.mode == 'full' or params.space_group is not None: if params.space_group is not None: space_group_info = params.space_group.primitive_setting() if not space_group_info.group().is_compatible_unit_cell( intensities.unit_cell()): logger.info( 'Skipping data set - incompatible space group and unit cell: %s, %s' % (space_group_info, intensities.unit_cell())) continue else: space_group_info = sgtbx.space_group_info('P1') intensities = intensities.customized_copy( space_group_info=space_group_info) datasets.append(intensities) crystal_symmetries = [d.crystal_symmetry().niggli_cell() for d in datasets] lattice_ids = range(len(datasets)) from xfel.clustering.cluster import Cluster from xfel.clustering.cluster_groups import unit_cell_info ucs = Cluster.from_crystal_symmetries(crystal_symmetries, lattice_ids=lattice_ids) threshold = 1000 if params.save_plot: from matplotlib import pyplot as plt fig = plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8)) ax = plt.gca() else: ax = None clusters, _ = ucs.ab_cluster(params.unit_cell_clustering.threshold, log=params.unit_cell_clustering.log, write_file_lists=False, schnell=False, doplot=params.save_plot, ax=ax) if params.save_plot: plt.tight_layout() plt.savefig('%scluster_unit_cell.png' % params.plot_prefix) plt.close(fig) logger.info(unit_cell_info(clusters)) largest_cluster = None largest_cluster_lattice_ids = None for cluster in clusters: cluster_lattice_ids = [m.lattice_id for m in cluster.members] if largest_cluster_lattice_ids is None: largest_cluster_lattice_ids = cluster_lattice_ids elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids): largest_cluster_lattice_ids = cluster_lattice_ids dataset_selection = largest_cluster_lattice_ids if len(dataset_selection) < len(datasets): logger.info('Selecting subset of data for cosym analysis: %s' % str(dataset_selection)) datasets = [datasets[i] for i in dataset_selection] # per-dataset change of basis operator to ensure all consistent change_of_basis_ops = [] for i, dataset in enumerate(datasets): metric_subgroups = sgtbx.lattice_symmetry.metric_subgroups(dataset, max_delta=5) subgroup = metric_subgroups.result_groups[0] cb_op_inp_best = subgroup['cb_op_inp_best'] datasets[i] = dataset.change_basis(cb_op_inp_best) change_of_basis_ops.append(cb_op_inp_best) cb_op_ref_min = datasets[0].change_of_basis_op_to_niggli_cell() for i, dataset in enumerate(datasets): if params.space_group is None: datasets[i] = dataset.change_basis(cb_op_ref_min).customized_copy( space_group_info=sgtbx.space_group_info('P1')) else: datasets[i] = dataset.change_basis(cb_op_ref_min) datasets[i] = datasets[i].customized_copy( crystal_symmetry=crystal.symmetry( unit_cell=datasets[i].unit_cell(), space_group_info=params.space_group.primitive_setting(), assert_is_compatible_unit_cell=False)) datasets[i] = datasets[i].merge_equivalents().array() change_of_basis_ops[i] = cb_op_ref_min * change_of_basis_ops[i] result = analyse_datasets(datasets, params) space_groups = {} reindexing_ops = {} for dataset_id in result.reindexing_ops.iterkeys(): if 0 in result.reindexing_ops[dataset_id]: cb_op = result.reindexing_ops[dataset_id][0] reindexing_ops.setdefault(cb_op, []) reindexing_ops[cb_op].append(dataset_id) if dataset_id in result.space_groups: space_groups.setdefault(result.space_groups[dataset_id], []) space_groups[result.space_groups[dataset_id]].append(dataset_id) logger.info('Space groups:') for sg, datasets in space_groups.iteritems(): logger.info(str(sg.info().reference_setting())) logger.info(datasets) logger.info('Reindexing operators:') for cb_op, datasets in reindexing_ops.iteritems(): logger.info(cb_op) logger.info(datasets) if (len(experiments) and len(reflections) and params.output.reflections is not None and params.output.experiments is not None): import copy from dxtbx.model import ExperimentList from dxtbx.serialize import dump reindexed_experiments = ExperimentList() reindexed_reflections = flex.reflection_table() expt_id = 0 for cb_op, dataset_ids in reindexing_ops.iteritems(): cb_op = sgtbx.change_of_basis_op(cb_op) for dataset_id in dataset_ids: expt = experiments[dataset_selection[dataset_id]] refl = reflections[dataset_selection[dataset_id]] reindexed_expt = copy.deepcopy(expt) refl_reindexed = copy.deepcopy(refl) cb_op_this = cb_op * change_of_basis_ops[dataset_id] reindexed_expt.crystal = reindexed_expt.crystal.change_basis( cb_op_this) refl_reindexed['miller_index'] = cb_op_this.apply( refl_reindexed['miller_index']) reindexed_experiments.append(reindexed_expt) refl_reindexed['id'] = flex.int(refl_reindexed.size(), expt_id) reindexed_reflections.extend(refl_reindexed) expt_id += 1 logger.info('Saving reindexed experiments to %s' % params.output.experiments) dump.experiment_list(reindexed_experiments, params.output.experiments) logger.info('Saving reindexed reflections to %s' % params.output.reflections) reindexed_reflections.as_pickle(params.output.reflections) elif params.output.suffix is not None: for cb_op, dataset_ids in reindexing_ops.iteritems(): cb_op = sgtbx.change_of_basis_op(cb_op) for dataset_id in dataset_ids: file_name = files[dataset_selection[dataset_id]] basename = os.path.basename(file_name) out_name = os.path.splitext( basename)[0] + params.output.suffix + '_' + str( dataset_selection[dataset_id]) + ".mtz" reader = any_reflection_file(file_name) assert reader.file_type() == 'ccp4_mtz' mtz_object = reader.file_content() cb_op_this = cb_op * change_of_basis_ops[dataset_id] if not cb_op_this.is_identity_op(): logger.info('reindexing %s (%s)' % (file_name, cb_op_this.as_xyz())) mtz_object.change_basis_in_place(cb_op_this) mtz_object.write(out_name)
def __init__(self, miller_obs, miller_calc, min_d_star_sq=0.0, max_d_star_sq=2.0, n_points=2000, level=6.0): assert miller_obs.indices().all_eq(miller_calc.indices()) if (miller_obs.is_xray_amplitude_array()): miller_obs = miller_obs.f_as_f_sq() if (miller_calc.is_xray_amplitude_array()): miller_calc = miller_calc.f_as_f_sq() self.obs = miller_obs.deep_copy() self.calc = miller_calc.deep_copy() self.mind = min_d_star_sq self.maxd = max_d_star_sq self.m = n_points self.n = 2 self.level = level norma_obs = absolute_scaling.kernel_normalisation( miller_array=self.obs, auto_kernel=True, n_bins=45, n_term=17) norma_calc = absolute_scaling.kernel_normalisation( miller_array=self.calc, auto_kernel=True, n_bins=45, n_term=17) obs_d_star_sq = norma_obs.d_star_sq_array calc_d_star_sq = norma_calc.d_star_sq_array sel_calc_obs = norma_calc.bin_selection.select(norma_obs.bin_selection) sel_obs_calc = norma_obs.bin_selection.select(norma_calc.bin_selection) sel = ((obs_d_star_sq > low_lim) & (obs_d_star_sq < high_lim) & (norma_obs.mean_I_array > 0)) sel = sel.select(sel_calc_obs) self.obs_d_star_sq = obs_d_star_sq.select(sel) self.calc_d_star_sq = calc_d_star_sq.select(sel_obs_calc).select(sel) self.mean_obs = norma_obs.mean_I_array.select(sel) self.mean_calc = norma_calc.mean_I_array.select(sel_obs_calc).select( sel) self.var_obs = norma_obs.var_I_array.select(sel) self.var_calc = norma_calc.var_I_array.select(sel_obs_calc).select(sel) # make an interpolator object please self.interpol = scale_curves.curve_interpolator( self.mind, self.maxd, self.m) # do the interpolation tmp_obs_d_star_sq , self.mean_obs,self.obs_a , self.obs_b = \ self.interpol.interpolate(self.obs_d_star_sq,self.mean_obs) self.obs_d_star_sq , self.var_obs,self.obs_a , self.obs_b = \ self.interpol.interpolate(self.obs_d_star_sq, self.var_obs) tmp_calc_d_star_sq , self.mean_calc,self.calc_a, self.calc_b = \ self.interpol.interpolate(self.calc_d_star_sq,self.mean_calc) self.calc_d_star_sq, self.var_calc,self.calc_a , self.calc_b = \ self.interpol.interpolate(self.calc_d_star_sq,self.var_calc) self.mean_ratio_engine = chebyshev_polynome(mean_coefs.size(), low_lim - 1e-3, high_lim + 1e-3, mean_coefs) self.std_ratio_engine = chebyshev_polynome(std_coefs.size(), low_lim - 1e-3, high_lim + 1e-3, std_coefs) self.x = flex.double([0, 0]) self.low_lim_for_scaling = 1.0 / (4.0 * 4.0) #0.0625 selection = (self.calc_d_star_sq > self.low_lim_for_scaling) if (selection.count(True) == 0): raise Sorry( "No reflections within required resolution range after " + "filtering.") self.weight_array = selection.as_double() / (2.0 * self.var_obs) assert (not self.weight_array.all_eq(0.0)) self.mean = flex.double( [1.0 / (flex.sum(self.mean_calc) / flex.sum(self.mean_obs)), 0.0]) self.sigmas = flex.double([0.5, 0.5]) s = 1.0 / (flex.sum(self.weight_array * self.mean_calc) / flex.sum(self.weight_array * self.mean_obs)) b = 0.0 self.sart_simplex = [ flex.double([s, b]), flex.double([s + 0.1, b + 1.1]), flex.double([s - 0.1, b - 1.1]) ] self.opti = simplex.simplex_opt(2, self.sart_simplex, self) sol = self.opti.get_solution() self.scale = abs(sol[0]) self.b_value = sol[1] self.modify_weights() self.all_bad_z_scores = self.weight_array.all_eq(0.0) if (not self.all_bad_z_scores): s = 1.0 / (flex.sum(self.weight_array * self.mean_calc) / flex.sum(self.weight_array * self.mean_obs)) b = 0.0 self.sart_simplex = [ flex.double([s, b]), flex.double([s + 0.1, b + 1.1]), flex.double([s - 0.1, b - 1.1]) ] self.opti = simplex.simplex_opt(2, self.sart_simplex, self)
def do_clustering(self, nproc=1, b_scale=False, use_normalized=False, html_maker=None): self.clusters = {} prefix = os.path.join(self.wdir, "cctable") assert (b_scale, use_normalized).count(True) <= 1 if len(self.arrays) < 2: print "WARNING: less than two data! can't do cc-based clustering" self.clusters[1] = [float("nan"), [0]] return # Absolute scaling using Wilson-B factor if b_scale: from mmtbx.scaling.matthews import p_vm_calculator from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling ofs_wilson = open("%s_wilson_scales.dat"%prefix, "w") n_residues = p_vm_calculator(self.arrays.values()[0], 1, 0).best_guess ofs_wilson.write("# guessed n_residues= %d\n" % n_residues) ofs_wilson.write("file wilsonB\n") for f in self.arrays: arr = self.arrays[f] iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0) wilson_b = iso_scale_and_b.b_wilson ofs_wilson.write("%s %.3f\n" % (f, wilson_b)) if wilson_b > 0: # Ignoring data with B<0? is a bad idea.. but how..? tmp = flex.exp(-2. * wilson_b * arr.unit_cell().d_star_sq(arr.indices())/4.) self.arrays[f] = arr.customized_copy(data=arr.data()*tmp, sigmas=arr.sigmas()*tmp) ofs_wilson.close() elif use_normalized: from mmtbx.scaling.absolute_scaling import kernel_normalisation for f in self.arrays: arr = self.arrays[f] normaliser = kernel_normalisation(arr, auto_kernel=True) self.arrays[f] = arr.customized_copy(data=arr.data()/normaliser.normalizer_for_miller_array, sigmas=arr.sigmas()/normaliser.normalizer_for_miller_array) # Prep args = [] for i in xrange(len(self.arrays)-1): for j in xrange(i+1, len(self.arrays)): args.append((i,j)) # Calc all CC worker = lambda x: calc_cc(self.arrays.values()[x[0]], self.arrays.values()[x[1]]) results = easy_mp.pool_map(fixed_func=worker, args=args, processes=nproc) # Check NaN and decide which data to remove idx_bad = {} nans = [] cc_data_for_html = [] for (i,j), (cc,nref) in zip(args, results): cc_data_for_html.append((i,j,cc,nref)) if cc==cc: continue idx_bad[i] = idx_bad.get(i, 0) + 1 idx_bad[j] = idx_bad.get(j, 0) + 1 nans.append([i,j]) if html_maker is not None: html_maker.add_cc_clustering_details(cc_data_for_html) idx_bad = idx_bad.items() idx_bad.sort(key=lambda x:x[1]) remove_idxes = set() for idx, badcount in reversed(idx_bad): if len(filter(lambda x: idx in x, nans)) == 0: continue remove_idxes.add(idx) nans = filter(lambda x: idx not in x, nans) if len(nans) == 0: break use_idxes = filter(lambda x: x not in remove_idxes, xrange(len(self.arrays))) # Make table: original index (in file list) -> new index (in matrix) count = 0 org2now = collections.OrderedDict() for i in xrange(len(self.arrays)): if i in remove_idxes: continue org2now[i] = count count += 1 if len(remove_idxes) > 0: open("%s_notused.lst"%prefix, "w").write("\n".join(map(lambda x: self.arrays.keys()[x], remove_idxes))) # Make matrix mat = numpy.zeros(shape=(len(use_idxes), len(use_idxes))) for (i,j), (cc,nref) in zip(args, results): if i in remove_idxes or j in remove_idxes: continue mat[org2now[j], org2now[i]] = cc open("%s.matrix"%prefix, "w").write(" ".join(map(lambda x:"%.4f"%x, mat.flatten()))) ofs = open("%s.dat"%prefix, "w") ofs.write(" i j cc nref\n") for (i,j), (cc,nref) in zip(args, results): ofs.write("%4d %4d %.4f %4d\n" % (i,j,cc,nref)) open("%s_ana.R"%prefix, "w").write("""\ treeToList2 <- function(htree) { # stolen from $CCP4/share/blend/R/blend0.R groups <- list() itree <- dim(htree$merge)[1] for (i in 1:itree) { il <- htree$merge[i,1] ir <- htree$merge[i,2] if (il < 0) lab1 <- htree$labels[-il] if (ir < 0) lab2 <- htree$labels[-ir] if (il > 0) lab1 <- groups[[il]] if (ir > 0) lab2 <- groups[[ir]] lab <- c(lab1,lab2) lab <- as.integer(lab) groups <- c(groups,list(lab)) } return(groups) } cc<-scan("%(prefix)s.matrix") md<-matrix(1-cc, ncol=%(ncol)d, byrow=TRUE) hc <- hclust(as.dist(md),method="ward") pdf("tree.pdf") plot(hc) dev.off() png("tree.png",height=1000,width=1000) plot(hc) dev.off() hc$labels <- c(%(hclabels)s) groups <- treeToList2(hc) cat("ClNumber Nds Clheight IDs\\n",file="./CLUSTERS.txt") for (i in 1:length(groups)) { sorted_groups <- sort(groups[[i]]) linea <- sprintf("%%04d %%4d %%7.3f %%s\\n", i,length(groups[[i]]),hc$height[i], paste(sorted_groups,collapse=" ")) cat(linea, file="./CLUSTERS.txt", append=TRUE) } # reference: http://www.coppelia.io/2014/07/converting-an-r-hclust-object-into-a-d3-js-dendrogram/ library(rjson) HCtoJSON<-function(hc){ labels<-hc$labels merge<-data.frame(hc$merge) for (i in (1:nrow(merge))) { if (merge[i,1]<0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]),list(name=labels[-merge[i,2]])))")))} else if (merge[i,1]>0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node", merge[i,1], ", list(name=labels[-merge[i,2]])))")))} else if (merge[i,1]<0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]), node", merge[i,2],"))")))} else if (merge[i,1]>0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node",merge[i,1] , ", node" , merge[i,2]," ))")))} } eval(parse(text=paste0("JSON<-toJSON(node",nrow(merge), ")"))) return(JSON) } JSON<-HCtoJSON(hc) cat(JSON, file="dendro.json") q(save="yes") """ % dict(prefix=os.path.basename(prefix), ncol=len(self.arrays), hclabels=",".join(map(lambda x: "%d"%(x+1), org2now.keys())))) call(cmd="Rscript", arg="%s_ana.R" % os.path.basename(prefix), wdir=self.wdir) output = open(os.path.join(self.wdir, "CLUSTERS.txt")).readlines() for l in output[1:]: sp = l.split() clid, clheight, ids = sp[0], sp[2], sp[3:] self.clusters[int(clid)] = [float(clheight), map(int,ids)]
def _kernel_normalisation(self, miller_array, n_bins=45, n_term=17): return absolute_scaling.kernel_normalisation(miller_array=miller_array, auto_kernel=True, n_bins=n_bins, n_term=n_term)
def __init__(self, miller_obs, miller_calc, min_d_star_sq=0.0, max_d_star_sq=2.0, n_points=2000, level=6.0): assert miller_obs.indices().all_eq(miller_calc.indices()) if (miller_obs.is_xray_amplitude_array()) : miller_obs = miller_obs.f_as_f_sq() if (miller_calc.is_xray_amplitude_array()) : miller_calc = miller_calc.f_as_f_sq() self.obs = miller_obs.deep_copy() self.calc = miller_calc.deep_copy() self.mind = min_d_star_sq self.maxd = max_d_star_sq self.m = n_points self.n = 2 self.level = level norma_obs = absolute_scaling.kernel_normalisation( miller_array=self.obs, auto_kernel=True, n_bins=45, n_term=17) norma_calc = absolute_scaling.kernel_normalisation( miller_array=self.calc, auto_kernel=True, n_bins=45, n_term=17) obs_d_star_sq = norma_obs.d_star_sq_array calc_d_star_sq = norma_calc.d_star_sq_array sel_calc_obs = norma_calc.bin_selection.select(norma_obs.bin_selection) sel_obs_calc = norma_obs.bin_selection.select(norma_calc.bin_selection) sel = ((obs_d_star_sq > low_lim) & (obs_d_star_sq < high_lim) & (norma_obs.mean_I_array > 0)) sel = sel.select(sel_calc_obs) self.obs_d_star_sq = obs_d_star_sq.select( sel ) self.calc_d_star_sq = calc_d_star_sq.select( sel_obs_calc ).select(sel) self.mean_obs = norma_obs.mean_I_array.select(sel) self.mean_calc = norma_calc.mean_I_array.select( sel_obs_calc).select(sel) self.var_obs = norma_obs.var_I_array.select(sel) self.var_calc = norma_calc.var_I_array.select( sel_obs_calc).select(sel) # make an interpolator object please self.interpol = scale_curves.curve_interpolator( self.mind, self.maxd, self.m) # do the interpolation tmp_obs_d_star_sq , self.mean_obs,self.obs_a , self.obs_b = \ self.interpol.interpolate(self.obs_d_star_sq,self.mean_obs) self.obs_d_star_sq , self.var_obs,self.obs_a , self.obs_b = \ self.interpol.interpolate(self.obs_d_star_sq, self.var_obs) tmp_calc_d_star_sq , self.mean_calc,self.calc_a, self.calc_b = \ self.interpol.interpolate(self.calc_d_star_sq,self.mean_calc) self.calc_d_star_sq, self.var_calc,self.calc_a , self.calc_b = \ self.interpol.interpolate(self.calc_d_star_sq,self.var_calc) self.mean_ratio_engine = chebyshev_polynome( mean_coefs.size(), low_lim-1e-3, high_lim+1e-3,mean_coefs) self.std_ratio_engine = chebyshev_polynome( std_coefs.size(), low_lim-1e-3, high_lim+1e-3,std_coefs) self.x = flex.double([0,0]) self.low_lim_for_scaling = 1.0/(4.0*4.0) #0.0625 selection = (self.calc_d_star_sq > self.low_lim_for_scaling) if (selection.count(True) == 0) : raise Sorry("No reflections within required resolution range after "+ "filtering.") self.weight_array = selection.as_double() / (2.0 * self.var_obs) assert (not self.weight_array.all_eq(0.0)) self.mean = flex.double( [1.0/(flex.sum(self.mean_calc) / flex.sum(self.mean_obs)), 0.0 ] ) self.sigmas = flex.double( [0.5, 0.5] ) s = 1.0/(flex.sum(self.weight_array*self.mean_calc)/ flex.sum(self.weight_array*self.mean_obs)) b = 0.0 self.sart_simplex = [ flex.double([s,b]), flex.double([s+0.1,b+1.1]), flex.double([s-0.1,b-1.1]) ] self.opti = simplex.simplex_opt( 2, self.sart_simplex, self) sol = self.opti.get_solution() self.scale = abs(sol[0]) self.b_value = sol[1] self.modify_weights() self.all_bad_z_scores = self.weight_array.all_eq(0.0) if (not self.all_bad_z_scores) : s = 1.0/(flex.sum(self.weight_array*self.mean_calc) / flex.sum(self.weight_array*self.mean_obs)) b = 0.0 self.sart_simplex = [ flex.double([s,b]), flex.double([s+0.1,b+1.1]), flex.double([s-0.1,b-1.1]) ] self.opti = simplex.simplex_opt( 2, self.sart_simplex, self)