def test_center_of_coordinates_shift(self): """ test shifting translation to and from the center of coordinates of the master ncs copy """ # print sys._getframe().f_code.co_name xrs = self.pdb_inp.xray_structure_simple() nrg = self.ncs_restraints_group_list shifts = nu.get_ncs_groups_centers(xray_structure=xrs, ncs_restraints_group_list=nrg) xyz = self.pdb_inp.atoms().extract_xyz() center_of_coor = (flex.vec3_double([xyz.sum()]) * (1 / xyz.size())).round(8) # test shifts t1 = shifts[0].round(8) t2 = shifts[1].round(8) d1 = flex.sqrt((center_of_coor - t1).dot()).min_max_mean().as_tuple() d2 = flex.sqrt((center_of_coor - t2).dot()).min_max_mean().as_tuple() assert (d1 == d2) and (d1 == (0, 0, 0)) # test shift to center new_nrg = nu.shift_translation_to_center(shifts=shifts, ncs_restraints_group_list=nrg) expected = (-4.62169, -5.42257, 5.288) assert (new_nrg[0].copies[0].t.round(5)).elems == expected # back to original coordinates system old_nrg = nu.shift_translation_back_to_place( shifts=shifts, ncs_restraints_group_list=new_nrg) expected = (old_nrg[0].copies[0].t.round(5)).elems result = (nrg[0].copies[0].t.round(5)).elems assert result == expected
def run(prefix): """ Exercise standard (cctbx-based restraints) refinement with all defaults. """ xrs_good, xrs_poor, f_obs, r_free_flags = run_tests.setup_helix_example() run_tests.run_cmd(prefix, args=["restraints=cctbx"]) # Check results xrs_refined = iotbx.pdb.input(file_name=os.path.join( prefix, "m00_poor_refined.pdb")).xray_structure_simple() d = flex.sqrt((xrs_good.sites_cart() - xrs_poor.sites_cart()).dot()) assert flex.mean(d) > 0.10 d = flex.sqrt((xrs_good.sites_cart() - xrs_refined.sites_cart()).dot()) assert flex.mean(d) < 0.05 # Check R-factors r_start, r_final = None, None ofo = open("%s.log" % prefix, "r") for l in ofo.readlines(): if (l.strip().endswith("n_fev: 0")): r_start = float(l.split()[2]) if (l.strip().startswith("Best r_work:")): r_final = float(l.split()[2]) assert r_start > 0.1 assert r_final < 0.04 # Make sure output model actually corresponds to reported R-factor fmodel = mmtbx.f_model.manager(f_obs=f_obs, r_free_flags=r_free_flags, xray_structure=xrs_refined) fmodel.update_all_scales() assert fmodel.r_work() < 0.04 assert abs(r_final - fmodel.r_work()) < 0.0005, abs(r_final - fmodel.r_work())
def target(self, vector): """ Compute the functional by first applying the current values for the sd parameters to the input data, then computing the complete set of normalized deviations and finally using those normalized deviations to compute the functional.""" sdfac, sdb, sdadd = vector[0],0.0,vector[1] a_new_variance, b_new_variance = ccp4_model.apply_sd_error_params( vector, a_data, b_data, a_sigmas, b_sigmas) mean_num = (a_data/ (a_new_variance) ) + (b_data/ (b_new_variance) ) mean_den = (1./ (a_new_variance) ) + (1./ (b_new_variance) ) mean_values = mean_num / mean_den delta_I_a = a_data - mean_values normal_a = delta_I_a / flex.sqrt(a_new_variance) delta_I_b = b_data - mean_values normal_b = delta_I_b / flex.sqrt(b_new_variance) mean_order = flex.sort_permutation(mean_values) scatters = flex.double(50) scattersb = flex.double(50) for isubsection in xrange(50): subselect = mean_order[isubsection*len(mean_order)//50:(isubsection+1)*len(mean_order)//50] vals = normal_a.select(subselect) scatters[isubsection] = flex.mean_and_variance(vals).unweighted_sample_variance() valsb = normal_b.select(subselect) scattersb[isubsection] = flex.mean_and_variance(valsb).unweighted_sample_variance() f = flex.sum( flex.pow(1.-scatters, 2) ) print "f: % 12.1f, sdfac: %8.5f, sdb: %8.5f, sdadd: %8.5f"%(f, sdfac, sdb, sdadd) return f
def test_linear2d_modeller(self): from dials.algorithms.background.simple import Linear2dModeller from dials.array_family import flex modeller = Linear2dModeller() # Generate shoeboxes ma = 100 mb = 1 mc = 2 sboxes, masks = self.generate_background(self.size, 1000, ma, mb, mc, 0) pa = [] pv = [] for i in range(1000): model = modeller.create(sboxes[i], masks[i]) assert len(model.params()) == 9 * 3 assert len(model.variances()) == 9 * 3 p = model.params() v = model.variances() for j in range(9): pa.append(tuple(p[3 * j : 3 * (j + 1)])) pv.append(tuple(v[3 * j : 3 * (j + 1)])) a, b, c = zip(*pa) va, vb, vc = zip(*pv) # Compute Z for each parameter za = (flex.double(a) - ma) / flex.sqrt(flex.double(va)) zb = (flex.double(b) - mb) / flex.sqrt(flex.double(vb)) zc = (flex.double(c) - mc) / flex.sqrt(flex.double(vc)) # Check it looks standard normal self.assert_std_norm(za) self.assert_std_norm(zb) self.assert_std_norm(zc)
def target(self, vector): """ Compute the functional by first applying the current values for the sd parameters to the input data, then computing the complete set of normalized deviations and finally using those normalized deviations to compute the functional.""" sdfac, sdb, sdadd = vector[0],0.0,vector[1] a_new_variance, b_new_variance = ccp4_model.apply_sd_error_params( vector, a_data, b_data, a_sigmas, b_sigmas) mean_num = (a_data/ (a_new_variance) ) + (b_data/ (b_new_variance) ) mean_den = (1./ (a_new_variance) ) + (1./ (b_new_variance) ) mean_values = mean_num / mean_den delta_I_a = a_data - mean_values normal_a = delta_I_a / flex.sqrt(a_new_variance) delta_I_b = b_data - mean_values normal_b = delta_I_b / flex.sqrt(b_new_variance) mean_order = flex.sort_permutation(mean_values) scatters = flex.double(50) scattersb = flex.double(50) for isubsection in range(50): subselect = mean_order[isubsection*len(mean_order)//50:(isubsection+1)*len(mean_order)//50] vals = normal_a.select(subselect) scatters[isubsection] = flex.mean_and_variance(vals).unweighted_sample_variance() valsb = normal_b.select(subselect) scattersb[isubsection] = flex.mean_and_variance(valsb).unweighted_sample_variance() f = flex.sum( flex.pow(1.-scatters, 2) ) print "f: % 12.1f, sdfac: %8.5f, sdb: %8.5f, sdadd: %8.5f"%(f, sdfac, sdb, sdadd) return f
def log_frame(experiments, reflections, params, run, n_strong, timestamp = None, two_theta_low = None, two_theta_high = None): app = dxtbx_xfel_db_application(params) db_run = app.get_run(run_number=run) if params.input.trial is None: db_trial = app.get_trial(trial_id = params.input.trial_id) params.input.trial = db_trial.trial else: db_trial = app.get_trial(trial_number = params.input.trial) if params.input.rungroup is None: db_event = app.create_event(timestamp = timestamp, run_id = db_run.id, trial_id = db_trial.id, n_strong = n_strong, two_theta_low = two_theta_low, two_theta_high = two_theta_high) else: db_event = app.create_event(timestamp = timestamp, run_id = db_run.id, trial_id = db_trial.id, rungroup_id = params.input.rungroup, n_strong = n_strong, two_theta_low = two_theta_low, two_theta_high = two_theta_high) if experiments is not None: assert len(experiments) == 1 db_experiment = app.create_experiment(experiments[0]) app.link_imageset_frame(db_experiment.imageset, db_event) d = experiments[0].crystal.get_unit_cell().d(reflections['miller_index']) for db_bin in db_experiment.crystal.cell.bins: # will be [] if there are no isoforms sel = (d <= float(db_bin.d_max)) & (d > float(db_bin.d_min)) sel &= reflections['intensity.sum.value'] > 0 refls = reflections.select(sel) n_refls = len(refls) Cell_Bin(app, count = n_refls, bin_id = db_bin.id, crystal_id = db_experiment.crystal.id, avg_intensity = flex.mean(refls['intensity.sum.value']) if n_refls > 0 else None, avg_sigma = flex.mean(flex.sqrt(refls['intensity.sum.variance'])) if n_refls > 0 else None, avg_i_sigi = flex.mean(refls['intensity.sum.value'] / flex.sqrt(refls['intensity.sum.variance'])) if n_refls > 0 else None)
def compute_weight(self, fmodel, rm): if (self.weight_was_provided): return random.seed(1) flex.set_random_seed(1) # fmodel_dc = fmodel.deep_copy() xrs = fmodel_dc.xray_structure.deep_copy_scatterers() if (self.shake_sites): xrs.shake_sites_in_place(mean_distance=0.2) fmodel_dc.update_xray_structure(xray_structure=xrs, update_f_calc=True) x_target_functor = fmodel_dc.target_functor() tgx = x_target_functor(compute_gradients=True) gx = flex.vec3_double(tgx.\ gradients_wrt_atomic_parameters(site=True).packed()) tc, gc = rm.target_and_gradients(sites_cart=xrs.sites_cart()) x = gc.norm() y = gx.norm() # filter out large contributions gx_d = flex.sqrt(gx.dot()) sel = gx_d > flex.mean(gx_d) * 6 y = gx.select(~sel).norm() # gc_d = flex.sqrt(gc.dot()) sel = gc_d > flex.mean(gc_d) * 6 x = gc.select(~sel).norm() ################ if (y != 0.0): self.data_weight = x / y else: self.data_weight = 1.0 # ad hoc default fallback
def log_frame(experiments, reflections, params, run, n_strong, timestamp=None, two_theta_low=None, two_theta_high=None): app = dxtbx_xfel_db_application(params) db_run = app.get_run(run_number=run) if params.input.trial is None: db_trial = app.get_trial(trial_id=params.input.trial_id) params.input.trial = db_trial.trial else: db_trial = app.get_trial(trial_number=params.input.trial) if params.input.rungroup is None: db_event = app.create_event(timestamp=timestamp, run_id=db_run.id, trial_id=db_trial.id, n_strong=n_strong, two_theta_low=two_theta_low, two_theta_high=two_theta_high) else: db_event = app.create_event(timestamp=timestamp, run_id=db_run.id, trial_id=db_trial.id, rungroup_id=params.input.rungroup, n_strong=n_strong, two_theta_low=two_theta_low, two_theta_high=two_theta_high) if experiments is not None: assert len(experiments) == 1 db_experiment = app.create_experiment(experiments[0]) app.link_imageset_frame(db_experiment.imageset, db_event) d = experiments[0].crystal.get_unit_cell().d( reflections['miller_index']) for db_bin in db_experiment.crystal.cell.bins: # will be [] if there are no isoforms sel = (d <= float(db_bin.d_max)) & (d > float(db_bin.d_min)) sel &= reflections['intensity.sum.value'] > 0 refls = reflections.select(sel) n_refls = len(refls) Cell_Bin( app, count=n_refls, bin_id=db_bin.id, crystal_id=db_experiment.crystal.id, avg_intensity=flex.mean(refls['intensity.sum.value']) if n_refls > 0 else None, avg_sigma=flex.mean(flex.sqrt(refls['intensity.sum.variance'])) if n_refls > 0 else None, avg_i_sigi=flex.mean( refls['intensity.sum.value'] / flex.sqrt(refls['intensity.sum.variance'])) if n_refls > 0 else None)
def correct_sigmas(self, sd_fac, sd_b, sd_add): # sd' = SDfac * Sqrt(sd^2 + SdB * I + (SDadd * I)^2) sigmas = sd_fac * flex.sqrt( flex.pow2(self.intensities.sigmas() + (sd_b * self.intensities.data()) + flex.pow2(sd_add * self.intensities.data()))) variance = flex.pow2(self.intensities.sigmas()) si2 = flex.pow2(sd_add * self.intensities.data()) ssc = variance + sd_b * self.intensities.data() + si2 MINVARINFRAC = 0.1 ssc.set_selected(ssc < MINVARINFRAC * variance, MINVARINFRAC * variance) sd = sd_fac * flex.sqrt(ssc) self.intensities = self.intensities.customized_copy( sigmas=sd, info=self.intensities.info())
def core(mode, result, t1, t2): prefix = "exercise_%s"%mode ph = iotbx.pdb.input(source_info=None, lines=pdb_str).construct_hierarchy() s0 = ph.atoms().extract_xyz() ph.write_pdb_file(file_name="%s_in.pdb"%prefix) ph = mmtbx.utils.switch_rotamers(pdb_hierarchy=ph, mode=mode) ph.write_pdb_file(file_name="%s_out.pdb"%prefix) s1 = iotbx.pdb.input(source_info=None,lines=result).atoms().extract_xyz() s2 = ph.atoms().extract_xyz() d = flex.sqrt((s1 - s2).dot()).min_max_mean().as_tuple() assert approx_equal(d, t1, 1.e-3) d = flex.sqrt((s2 - s0).dot()).min_max_mean().as_tuple() assert approx_equal(d, t2, 0.1)
def blank_integrated_analysis(reflections, scan, phi_step, fractional_loss): prf_sel = reflections.get_flags(reflections.flags.integrated_prf) if prf_sel.count(True) > 0: reflections = reflections.select(prf_sel) intensities = reflections["intensity.prf.value"] variances = reflections["intensity.prf.variance"] else: sum_sel = reflections.get_flags(reflections.flags.integrated_sum) reflections = reflections.select(sum_sel) intensities = reflections["intensity.sum.value"] variances = reflections["intensity.sum.variance"] i_sigi = intensities / flex.sqrt(variances) xyz_px = reflections["xyzobs.px.value"] x_px, y_px, z_px = xyz_px.parts() phi = scan.get_angle_from_array_index(z_px) osc = scan.get_oscillation()[1] n_images_per_step = iceil(phi_step / osc) phi_step = n_images_per_step * osc phi_min = flex.min(phi) phi_max = flex.max(phi) n_steps = iceil((phi_max - phi_min) / phi_step) hist = flex.histogram(z_px, n_slots=n_steps) mean_i_sigi = flex.double() for i, slot_info in enumerate(hist.slot_infos()): sel = (z_px >= slot_info.low_cutoff) & (z_px < slot_info.high_cutoff) if sel.count(True) == 0: mean_i_sigi.append(0) else: mean_i_sigi.append(flex.mean(i_sigi.select(sel))) fractional_mean_i_sigi = mean_i_sigi / flex.max(mean_i_sigi) potential_blank_sel = mean_i_sigi <= (fractional_loss * flex.max(mean_i_sigi)) xmin, xmax = zip(*[(slot_info.low_cutoff, slot_info.high_cutoff) for slot_info in hist.slot_infos()]) d = { "data": [ { "x": list(hist.slot_centers()), "y": list(mean_i_sigi), "xlow": xmin, "xhigh": xmax, "blank": list(potential_blank_sel), "type": "bar", "name": "blank_counts_analysis", } ], "layout": {"xaxis": {"title": "z observed (images)"}, "yaxis": {"title": "Number of reflections"}, "bargap": 0}, } blank_regions = blank_regions_from_sel(d["data"][0]) d["blank_regions"] = blank_regions return d
def show(self, sites_cart, n_slots_difference_histogram=6, out=None, prefix=""): if (out is None): out = sys.stdout selection_strings = self.group.selection_strings for i_op,pair,mx,rms in zip( count(1), self.group.selection_pairs, self.matrices, self.rms): print >> out, prefix + "NCS operator %d:" % i_op print >> out, prefix + " Reference selection:", \ show_string(selection_strings[0]) print >> out, prefix + " Other selection:", \ show_string(selection_strings[i_op]) print >> out, prefix + " Number of atom pairs:", len(pair[0]) print >> out, mx.r.mathematica_form( label="Rotation", format="%.6g", one_row_per_line=True, prefix=prefix+" ") print >> out, mx.t.mathematica_form( label="Translation", format="%.6g", prefix=prefix+" ") x = sites_cart.select(pair[0]) y = mx * sites_cart.select(pair[1]) d_sq = (x-y).dot() if (n_slots_difference_histogram is not None): print >> out, prefix + " Histogram of differences:" diff_histogram = flex.histogram( data=flex.sqrt(d_sq), n_slots=n_slots_difference_histogram) diff_histogram.show( f=out, prefix=prefix+" ", format_cutoffs="%8.6f") print >> out, \ prefix + " RMS difference with respect to the reference: %8.6f" %(rms)
def lookup(self, coefs, codes, ntop): self.best_rmax = flex.double() self.best_codes = [] self.coefs = [] for c in coefs: self.coefs.append(c[0:self.nn_total]) self.codes = codes self.ntop = ntop self.mean_ws = flex.sqrt(1.0 / flex.double(range(1, ntop + 1))) if (self.scan): self.rmax_list = flex.double() self.top_hits = [] self.scores = flex.double() self.rmax_max = self.rmax * 2.0 self.rmax_min = max(self.rmax / 2.0, 1) for coef in self.coefs: self.this_coef = coef gss(self.search, self.rmax_min, self.rmax_max, eps=0.5, N=30) self.rmax_list.append(self.this_rmax) self.scores.append(self.this_score) top_indices = self.tops(self.scores, self.ntop) for ii in top_indices: self.best_rmax.append(self.rmax_list[ii]) self.best_codes.append(codes[ii]) self.best_indices = top_indices self.plot_intensity(self.rmax_list, top_indices, self.scores, self.coefs, codes, qmax=0.5)
def q_range_analyses(self, rg, io, rat_lim=1.5, window_size=3, level=10.0, sigma=False): selector = flex.bool(self.data.q*rg<rat_lim) tmp_q = self.data.q.select( selector ) tmp_i = self.data.i.select( selector ) tmp_s = self.data.s.select( selector ) rg2 = rg*rg lni = math.log( io ) cs = None if sigma: cs = self.chi_square( lni, rg2, tmp_q, tmp_i, tmp_s,False ) cs = flex.sqrt( cs ) else: cs = flex.exp(lni-rg2*tmp_q*tmp_q/3.0) ss = cs/100.0 cs = flex.abs(tmp_i-cs)/ss not_okai_ranges = [] previous_one_was_bad=False tmp_range = [] for ii in xrange( window_size, cs.size() ): tmp_cs = flex.mean( cs[ii-window_size:ii] ) if tmp_cs > level: if not previous_one_was_bad: tmp_range.append( tmp_q[ii] ) tmp_range.append( tmp_q[ii] ) previous_one_was_bad=True else: tmp_range[1] = tmp_q[ii] else: previous_one_was_bad=False if len(tmp_range)>0: not_okai_ranges.append( tmp_range ) tmp_range=[] return not_okai_ranges
def show(self, sites_cart, n_slots_difference_histogram=6, out=None, prefix=""): if (out is None): out = sys.stdout selection_strings = self.group.selection_strings for i_op, pair, mx, rms in zip(count(1), self.group.selection_pairs, self.matrices, self.rms): print >> out, prefix + "NCS operator %d:" % i_op print >> out, prefix + " Reference selection:", \ show_string(selection_strings[0]) print >> out, prefix + " Other selection:", \ show_string(selection_strings[i_op]) print >> out, prefix + " Number of atom pairs:", len(pair[0]) print >> out, mx.r.mathematica_form(label="Rotation", format="%.6g", one_row_per_line=True, prefix=prefix + " ") print >> out, mx.t.mathematica_form(label="Translation", format="%.6g", prefix=prefix + " ") x = sites_cart.select(pair[0]) y = mx * sites_cart.select(pair[1]) d_sq = (x - y).dot() if (n_slots_difference_histogram is not None): print >> out, prefix + " Histogram of differences:" diff_histogram = flex.histogram( data=flex.sqrt(d_sq), n_slots=n_slots_difference_histogram) diff_histogram.show(f=out, prefix=prefix + " ", format_cutoffs="%8.6f") print >> out, \ prefix + " RMS difference with respect to the reference: %8.6f" %(rms)
def run(prefix, verbose=False): for altloc_method in ["average", "subtract"]: path = qr_unit_tests + "/data_files/" files = ["gly2_1.pdb", "altlocs2.pdb", "altlocs.pdb", "gly2_2.pdb"] for f in files: print(f) fn = path + f ph = iotbx.pdb.input(fn).construct_hierarchy() # if (verbose): print("expansion=False ") rm1, sites_cart = get_restraints_manager( expansion=False, file_name=fn, altloc_method=altloc_method) t1, g1 = rm1.target_and_gradients(sites_cart=sites_cart) # if (verbose): print("expansion=True ") rm2, sites_cart = get_restraints_manager( expansion=True, file_name=fn, altloc_method=altloc_method) t2, g2 = rm2.target_and_gradients(sites_cart=sites_cart) # if (verbose): atoms = ph.atoms() ds = flex.sqrt((g1 - g2).dot()) for d, g, gg, dist, a in zip((g1 - g2), g1, g2, ds, atoms): print(["%8.4f"%i for i in d], \ ["%8.4f"%i for i in g], \ ["%8.4f"%i for i in gg], "%8.4f"%dist, a.quote()) # rs = flex.double() for a, b in zip(g1.as_double(), g2.as_double()): r = abs(abs(a) - abs(b)) / (abs(a) + abs(b)) * 2. * 100 rs.append(r) assert flex.max(flex.abs(rs)) < 1.e-6
def get_pr_statistics(self): sel = flex.bool(self.dist_mat[self.best_index] < self.mcdf_mean) self.average_pr = flex.double(int(self.d_array[-1]), 0) tmp_pr2 = self.average_pr * 0 refined_d_array = flex.double() for ii in range(self.saved_trials): if (sel[ii]): self.average_pr = self.average_pr + self.prs[ii] tmp_pr2 = tmp_pr2 + flex.pow2(self.prs[ii]) refined_d_array.append(self.d_array[ii]) #print refined_d_array.size() self.average_pr = self.average_pr / refined_d_array.size() self.sigma_pr = flex.sqrt( (tmp_pr2 / refined_d_array.size() - flex.pow2(self.average_pr))) self.d_mean = flex.mean(refined_d_array) self.d_sigma = math.sqrt( flex.mean(flex.pow2(refined_d_array - self.d_mean))) print "DMAX(based on k-s distance)=", print "BEST DMAX = ", self.d_array[self.best_index] print "MEAN DMAX(sigma) = ", self.d_mean, "(" + str(self.d_sigma) + ")" with open(self.outfile, "a") as log: print >> log, "DMAX(based on k-s distance)=", print >> log, "BEST DMAX = ", self.d_array[self.best_index] print >> log, "MEAN DMAX(sigma) = ", self.d_mean, "(" + str( self.d_sigma) + ")" self.r = flex.double(range(self.average_pr.size()))
def run(): path = qr_unit_tests + "/data_files/" files = [ "m2_complete_box_large.pdb", ] #"m2_complete_box_large.pdb", #"p212121.pdb", "p1_box_small.pdb", "p1_box_large.pdb"] for f in files: fn = path + f ph = iotbx.pdb.input(fn).construct_hierarchy() if list(ph.altloc_indices()) != ['']: continue # rm1, sites_cart = get_restraints_manager(expansion=False, file_name=fn) t1, g1 = rm1.target_and_gradients(sites_cart=sites_cart) # rm2, sites_cart = get_restraints_manager(expansion=True, file_name=fn) t2, g2 = rm2.target_and_gradients(sites_cart=sites_cart) # if 1: atoms = ph.atoms() ds = flex.sqrt((g1 - g2).dot()) for d, g, gg, dist, a in zip((g1 - g2), g1, g2, ds, atoms): print(["%8.4f"%i for i in d], \ ["%8.4f"%i for i in g], \ ["%8.4f"%i for i in gg], "%8.4f"%dist, a.quote()) # rs = flex.double() for a, b in zip(g1.as_double(), g2.as_double()): r = abs(abs(a) - abs(b)) / (abs(a) + abs(b)) * 2. * 100 #print r rs.append(r) print(f, "min/max/mean:", rs.min_max_mean().as_tuple())
def _do_outlier_rejection(self): """Add indices (w.r.t. the Ih_table data) to self._outlier_indices.""" Ih_table = self._Ih_table_block intensity = Ih_table.intensities g = Ih_table.inverse_scale_factors w = self.weights wgIsum = ((w * g * intensity) * Ih_table.h_index_matrix) * Ih_table.h_expand_matrix wg2sum = ( (w * g * g) * Ih_table.h_index_matrix) * Ih_table.h_expand_matrix # guard against zero divison errors - can happen due to rounding errors # or bad data giving g values are very small zero_sel = wg2sum == 0.0 # set as one for now, then mark as outlier below. This will only affect if # g is near zero, if w is zero then throw an assertionerror. wg2sum.set_selected(zero_sel, 1.0) assert w.all_gt(0) # guard against division by zero norm_dev = (intensity - (g * wgIsum / wg2sum)) / (flex.sqrt((1.0 / w) + flex.pow2(g / wg2sum))) norm_dev.set_selected(zero_sel, 1000) # to trigger rejection outliers = flex.abs(norm_dev) > self._zmax self._outlier_indices.extend( Ih_table.Ih_table["loc_indices"].select(outliers)) self._datasets.extend( self._Ih_table_block.Ih_table["dataset_id"].select(outliers))
def check_result(result_file_name): s1 = iotbx.pdb.input(file_name=result_file_name).atoms().extract_xyz() s2 = iotbx.pdb.input(file_name="answer_" + result_file_name).atoms().extract_xyz() r = flex.sqrt((s1 - s2).dot()).min_max_mean().as_tuple() print r return r
def compute_functional_and_gradients(self): # caculate difference between predicted and observed values self.distribution.set_parameters(p=self.x) is_cpp_ = getattr(self.distribution, "interface", "Python") == "C++" if is_cpp_: predicted = self.distribution.cdf(x=self.x_data) else: predicted = flex.double(self.n) for i in xrange(self.n): predicted[i] = self.distribution.cdf(x=self.x_data[i]) difference = predicted - self.y_data # target function for minimization is sum of rmsd f = flex.sum(flex.sqrt(difference * difference)) if is_cpp_: gradients = self.distribution.gradients(x=self.x_data, nparams=len(self.x), difference=difference) return f, gradients gradients = flex.double(len(self.x)) for i in xrange(self.n): g_i = self.distribution.cdf_gradients(x=self.x_data[i]) for j in xrange(len(self.x)): gradients[j] = gradients[j] + difference[i] * g_i[j] gradients = 2.0 * gradients return f, gradients
def evaluate(self, m): """Calculate the z-map relative to the mean and std map""" assert self.method in [ 'none', 'adjusted', 'uncertainty', 'adjusted+uncertainty' ] uncertainty = self.sigma_uncertainty[m.meta.tag] # Check that a value has been found/supplied if 'uncertainty' in self.method: assert uncertainty is not None # Extract maps in the right sparseness is_sparse = m.is_sparse() # Extract mean values (for subtraction) comp_vals = self.mu.get_map_data(sparse=is_sparse) # Extract the normalisation values (for division) if self.method == 'none': norm_vals = 1.0 # elif method == 'naive': # norm_vals = self.statistical_maps.stds_map.get_map_data(sparse=is_sparse) elif self.method == 'adjusted': norm_vals = self.sigma_adjusted.get_map_data(sparse=is_sparse) elif self.method == 'uncertainty': norm_vals = uncertainty elif self.method == 'adjusted+uncertainty': norm_vals = flex.sqrt( self.sigma_adjusted.get_map_data(sparse=is_sparse)**2 + uncertainty**2) else: raise Exception('method not found: {!s}'.format(self.method)) return (m - comp_vals) * (1.0 / norm_vals)
def set_k_isotropic_exp(self, r_start, use_scale_r, verbose, b_lower_limit=-100): arrays = mmtbx.arrays.init(f_calc=self.core.f_calc, f_masks=self.core.f_mask(), k_isotropic=self.core.k_isotropic, k_anisotropic=self.core.k_anisotropic, k_masks=self.core.k_mask()) sel = self.selection_work.data() rf = scitbx.math.gaussian_fit_1d_analytical( x=flex.sqrt(self.ss).select(sel), y=self.f_obs.data().select(sel), z=abs(arrays.f_model).data().select(sel)) if (rf.b < b_lower_limit): return r_start k = rf.a * flex.exp(-self.ss * rf.b) r = self.try_scale(k_isotropic_exp=k, use_scale_r=use_scale_r) if (r < r_start): if (verbose): print >> self.log, " r(set_k_isotropic_exp): %6.4f" % r self.core = self.core.update(k_isotropic_exp=k) r_start = self.r_factor() else: if (verbose): print >> self.log, " r(set_k_isotropic_exp): %6.4f (rejected)" % r return r_start
def calculate_esds(self): """Calculate ESDs of parameters""" # it is possible to get here with zero steps taken by the minimiser. For # example by failing for the MAX_TRIAL_ITERATIONS reason before any forward # steps are taken with the LevMar engine. If so the below is invalid, # so return early if self.history.get_nrows() == 0: return None if self.cf is None: return None # invert normal matrix from N^-1 = (U^-1)(U^-1)^T cf_inv = self.cf.matrix_packed_u_as_upper_triangle().\ matrix_inversion() nm_inv = cf_inv.matrix_multiply_transpose(cf_inv) # keep the estimated parameter variance-covariance matrix self.parameter_var_cov = \ self.history["reduced_chi_squared"][-1] * nm_inv # send this back to the models to calculate their uncertainties self._parameters.calculate_model_state_uncertainties( self.parameter_var_cov) # send parameter variances back to the parameter classes # themselves, for reporting purposes and for building restraints # based on existing parameterisations. s2 = self.parameter_var_cov.matrix_diagonal() assert s2.all_ge(0.0) s = flex.sqrt(s2) self._parameters.set_param_esds(s) return
def _find_nearest_neighbours(self, observed, predicted): '''Find the nearest predicted spot to the observed spot. Params: observed The observed reflections predicted The predicted reflections Returns: (nearest neighbours, distance) ''' from annlib_ext import AnnAdaptor from scitbx.array_family import flex from math import sqrt # Get the predicted coordinates predicted_xyz = [] for r in predicted: x, y = r.image_coord_px z = r.frame_number predicted_xyz.append((x, y, z)) # Create the KD Tree ann = AnnAdaptor(flex.double(predicted_xyz).as_1d(), 3) # Get the observed coordinates observed_xyz = [r.centroid_position for r in observed] # Query to find all the nearest neighbours ann.query(flex.double(observed_xyz).as_1d()) # Return the nearest neighbours and distances return ann.nn, flex.sqrt(ann.distances)
def _do_outlier_rejection(self): """Add indices (w.r.t. the Ih_table data) to self._outlier_indices.""" Ih_table = self._Ih_table_block target = self._target_Ih_table_block target_asu_Ih_dict = dict( zip(target.asu_miller_index, zip(target.Ih_values, target.variances))) Ih_table.Ih_table["target_Ih_value"] = flex.double(Ih_table.size, 0.0) Ih_table.Ih_table["target_Ih_sigmasq"] = flex.double( Ih_table.size, 0.0) for j, miller_idx in enumerate(Ih_table.asu_miller_index): if miller_idx in target_asu_Ih_dict: Ih_table.Ih_table["target_Ih_value"][j] = target_asu_Ih_dict[ miller_idx][0] Ih_table.Ih_table["target_Ih_sigmasq"][j] = target_asu_Ih_dict[ miller_idx][1] nz_sel = Ih_table.Ih_table["target_Ih_value"] != 0.0 Ih_table = Ih_table.select(nz_sel) norm_dev = (Ih_table.intensities - (Ih_table.inverse_scale_factors * Ih_table.Ih_table["target_Ih_value"])) / ( flex.sqrt(Ih_table.variances + (flex.pow2(Ih_table.inverse_scale_factors) * Ih_table.Ih_table["target_Ih_sigmasq"]))) outliers_sel = flex.abs(norm_dev) > self._zmax outliers_isel = nz_sel.iselection().select(outliers_sel) outliers = flex.bool(self._Ih_table_block.size, False) outliers.set_selected(outliers_isel, True) self._outlier_indices.extend( self._Ih_table_block.Ih_table["loc_indices"].select(outliers)) self._datasets.extend( self._Ih_table_block.Ih_table["dataset_id"].select(outliers))
def __init__(self, xray_structure, k_anisotropic, k_masks, ss): self.xray_structure = xray_structure self.k_anisotropic = k_anisotropic self.k_masks = k_masks self.ss = ss # k_total = self.k_anisotropic r = scitbx.math.gaussian_fit_1d_analytical(x=flex.sqrt(self.ss), y=k_total) k,b = r.a, r.b # k,b,r = mmtbx.bulk_solvent.fit_k_exp_b_to_k_total(k_total, self.ss, k, b) k_exp_overall, b_exp_overall = None,None if(r<0.7): k_exp_overall, b_exp_overall = k,b if(self.xray_structure is None): return None b_adj = 0 if([k_exp_overall, b_exp_overall].count(None)==0 and k != 0): bs1 = self.xray_structure.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.) def split(b_trace, xray_structure): b_min = xray_structure.min_u_cart_eigenvalue()*adptbx.u_as_b(1.) b_res = min(0, b_min + b_trace+1.e-6) b_adj = b_trace-b_res xray_structure.shift_us(b_shift = b_adj) return b_adj, b_res b_adj,b_res=split(b_trace=b_exp_overall,xray_structure=self.xray_structure) k_new = k_exp_overall*flex.exp(-self.ss*b_adj) bs2 = self.xray_structure.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.) diff = bs2-bs1 assert approx_equal(flex.min(diff), flex.max(diff)) assert approx_equal(flex.max(diff), b_adj) self.k_anisotropic = self.k_anisotropic/k_new self.k_masks = [m*flex.exp(-self.ss*b_adj) for m in self.k_masks]
def _update(self, sites_cart, threshold=0.1): shift_max = flex.max( flex.sqrt((sites_cart - self.sites_cart_previous).dot())) if (shift_max > threshold): self.pdb_hierarchy.atoms().set_xyz(sites_cart) self._expand() self.sites_cart_previous = sites_cart
def set_k_isotropic_exp(self, r_start,use_scale_r, verbose, b_lower_limit = -100): arrays = mmtbx.arrays.init( f_calc = self.core.f_calc, f_masks = self.core.f_mask(), k_isotropic = self.core.k_isotropic, k_anisotropic = self.core.k_anisotropic, k_masks = self.core.k_mask()) sel = self.selection_work.data() rf = scitbx.math.gaussian_fit_1d_analytical( x = flex.sqrt(self.ss).select(sel), y = self.f_obs.data().select(sel), z = abs(arrays.f_model).data().select(sel)) if(rf.b < b_lower_limit): return r_start k = rf.a * flex.exp(-self.ss * rf.b) r = self.try_scale(k_isotropic_exp = k, use_scale_r=use_scale_r) if(r<r_start): if(verbose): print >> self.log, " r(set_k_isotropic_exp): %6.4f"%r self.core = self.core.update(k_isotropic_exp = k) r_start = self.r_factor() else: if(verbose): print >> self.log, " r(set_k_isotropic_exp): %6.4f (rejected)"%r return r_start
def update(self, xray_structure=None, sites_cart=None): assert [xray_structure, sites_cart].count(None)!=0 s0 = self.sites_cart_start if(xray_structure is not None): s2 = xray_structure.sites_cart() self.model.set_xray_structure(xray_structure = xray_structure) elif(sites_cart is not None): s2 = sites_cart self.model.set_sites_cart(sites_cart=s2) else: s2 = self.model.get_sites_cart() self.dist_from_start = flex.mean(flex.sqrt((s0 - s2).dot())) self.dist_from_previous = flex.mean(flex.sqrt((self.s1 - s2).dot())) self.initialize() self.states_collector.add(sites_cart = s2) self.s1 = self.model.get_sites_cart() # must be last
def score_by_rmsd_xy(self, reverse=False): # smaller rmsds = better rmsd_x, rmsd_y, rmsd_z = flex.vec3_double( s.rmsds for s in self.all_solutions).parts() rmsd_xy = flex.sqrt(flex.pow2(rmsd_x) + flex.pow2(rmsd_y)) score = flex.log(rmsd_xy) / math.log(2) return self.rmsd_weight * (score - flex.min(score))
def calculate_esds(self): """Calculate ESDs of parameters""" # it is possible to get here with zero steps taken by the minimiser. For # example by failing for the MAX_TRIAL_ITERATIONS reason before any forward # steps are taken with the LevMar engine. If so the below is invalid, # so return early if self.history.get_nrows() == 0: return None if self.cf is None: return None # invert normal matrix from N^-1 = (U^-1)(U^-1)^T cf_inv = self.cf.matrix_packed_u_as_upper_triangle().matrix_inversion() nm_inv = cf_inv.matrix_multiply_transpose(cf_inv) # keep the estimated parameter variance-covariance matrix self.parameter_var_cov = self.history["reduced_chi_squared"][-1] * nm_inv # send this back to the models to calculate their uncertainties self._parameters.calculate_model_state_uncertainties(self.parameter_var_cov) # send parameter variances back to the parameter classes # themselves, for reporting purposes and for building restraints # based on existing parameterisations. s2 = self.parameter_var_cov.matrix_diagonal() assert s2.all_ge(0.0) s = flex.sqrt(s2) self._parameters.set_param_esds(s) return
def exercise_selection(prefix="exercise_selection"): ph = iotbx.pdb.input(source_info=None, lines=pdb_str).construct_hierarchy() sel = ph.atom_selection_cache().selection(string = "not resname TYR") s0 = ph.atoms().extract_xyz() ph.write_pdb_file(file_name="%s_in.pdb"%prefix) ph = mmtbx.utils.switch_rotamers(pdb_hierarchy=ph, mode="fix_outliers", selection = sel) ph.write_pdb_file(file_name="%s_out.pdb"%prefix) s1 =iotbx.pdb.input(source_info=None,lines=selection).atoms().extract_xyz() s2 = ph.atoms().extract_xyz() # assert fixed do not move d = flex.sqrt((s1 - s2).dot()).min_max_mean().as_tuple() assert approx_equal(d, [0,0,0]) d = flex.sqrt((s1 - s0).dot()).min_max_mean().as_tuple() assert approx_equal(d, [0,0,0]) d = flex.sqrt((s2 - s0).dot()).min_max_mean().as_tuple() assert approx_equal(d, [0,0,0])
def compute_per_model(h1,h2,log): if(len(h1.models())==1): return print >> log, "Per model (min/max/mean):" for m1,m2 in zip(h1.models(), h2.models()): r1 = m1.atoms().extract_xyz() r2 = m2.atoms().extract_xyz() d = flex.sqrt((r1 - r2).dot()).min_max_mean().as_tuple() print >> log, m1.id, ": %-8.3f %-8.3f %-8.3f"%d print >> log
def calc_score(atom_list, ref_list): ''' Calculate 'alignement score' all distances between same atoms summed ''' return np.sum( flex.sqrt((atom_list.extract_xyz() - ref_list).dot()).as_numpy_array())
def get_indexing_offset_correlation_coefficients( reflections, crystal, grid_search_scope, d_min=None, d_max=None, map_to_asu=False ): from copy import deepcopy from dials.array_family import flex space_group = crystal.get_space_group() unit_cell = crystal.get_unit_cell() from cctbx.crystal import symmetry as crystal_symmetry cs = crystal_symmetry(unit_cell, space_group.type().lookup_symbol()) from cctbx.miller import set as miller_set data = reflections["intensity.sum.value"] / flex.sqrt(reflections["intensity.sum.variance"]) ccs = flex.double() offsets = flex.vec3_int() nref = flex.size_t() original_miller_indices = reflections["miller_index"] ms = miller_set(cs, original_miller_indices) ms = ms.array(data) if d_min is not None or d_max is not None: ms = ms.resolution_filter(d_min=d_min, d_max=d_max) if map_to_asu: ms = ms.map_to_asu() g = grid_search_scope for h in range(-g, g + 1): for k in range(-g, g + 1): for l in range(-g, g + 1): for smx in ["-x,-y,-z"]: # reindexed = deepcopy(reflections) # hkl offset doubled as equivalent of h0 + 1, hI - 1 miller_indices = offset_miller_indices(ms.indices(), (2 * h, 2 * k, 2 * l)) reindexed_miller_indices = sgtbx.change_of_basis_op(smx).apply(miller_indices) rms = miller_set(cs, reindexed_miller_indices) rms = rms.array(data) # if params.d_min or params.d_max: # rms = rms.resolution_filter(d_min=params.d_min, d_max=params.d_max) # if map_to_asu: # rms = rms.map_to_asu() intensity, intensity_rdx = rms.common_sets(ms) cc = intensity.correlation(intensity_rdx).coefficient() ccs.append(cc) offsets.append((h, k, l)) nref.append(intensity.size()) return offsets, ccs, nref
def run(): # Must run in cryo-EM folder! root = os.getcwd() folders = ["6i5a", "3j63", "5fn5_6iyc_A/c", "3a5x", "5ly6"] for folder in folders: folder_1 = "/".join([root, folder]) print folder_1, "-" * (79 - len(folder_1)) if (not os.path.isdir(folder_1)): print " ...does not exist, skip." continue # map_file = None for fi in os.listdir(folder_1): if (fi.endswith(".map") or fi.endswith(".ccp4")): map_file = "/".join([folder_1, fi]) assert os.path.isfile(map_file) # start = "/".join([folder_1, "initial.pdb"]) assert os.path.isfile(start) z1 = get_z(start) emr1 = get_emringer(start, map_file) model_1, mes1 = show_geo(start, prefix=" start:") print mes1, "z_score:", z1, "EMRinger: %4.2f" % emr1 # for sub_ in os.listdir(folder_1): sub = "/".join([folder_1, sub_]) if (not os.path.isdir(sub)): continue if "opt" in sub: continue refined = "/".join([sub, "real_space_refined.pdb"]) msg = "" if (not os.path.isfile(refined)): msg = "<<< WARNING: no final refined model." print " ", sub_, msg sub1 = "/".join([sub, "pdb"]) if (not os.path.isdir(sub1)): print " not there:", sub1 continue files = [] for fn in os.listdir(sub1): if (not fn.endswith(".pdb")): continue files.append(fn) refined = get_latest(files=files) if (refined is None): print " No refined model." continue refined = "/".join([sub1, refined]) assert os.path.isfile(refined) # model_2, mes2 = show_geo(refined, prefix=" final:") z2 = get_z(refined) emr2 = get_emringer(refined, map_file) s1 = model_1.get_sites_cart() s2 = model_2.get_sites_cart() dist = flex.sqrt((s1 - s2).dot()) print mes2, "min/max/mean(start, final): %5.3f %5.3f %5.3f"%\ dist.min_max_mean().as_tuple(), "z_score:", z2, "EMRinger: %4.2f"%emr2
def compute_per_atom(h1,h2,log): as1 = list(h1.atoms()) as2 = list(h2.atoms()) if(len(as1)==1): return print >> log, "Per atom:" for a1, a2 in zip(as1, as2): r1 = flex.vec3_double([a1.xyz]) r2 = flex.vec3_double([a2.xyz]) d = flex.sqrt((r1 - r2).dot()) print >> log, a1.format_atom_record()[:30], ": %-8.3f"%d[0]
def from_dials_data_files(cls, params, experiments, reflection_table): """Initialise the class from an experiment list and reflection table. Args: params: A damage-analysis phil params object experiments: An ExperimentList reflection_table: A reflection table. """ reflection_table = filter_reflection_table( reflection_table, intensity_choice=["scale"], partiality_threshold=0.4 ) # get scaled intensities intensities = miller.array( miller.set( crystal.symmetry( unit_cell=median_unit_cell(experiments), space_group=experiments[0].crystal.get_space_group(), ), indices=reflection_table["miller_index"], anomalous_flag=params.anomalous, ), data=reflection_table["intensity.scale.value"], sigmas=flex.sqrt(reflection_table["intensity.scale.variance"]), ) intensities.set_observation_type_xray_intensity() doses = flex.double() start_doses, doses_per_image = interpret_images_to_doses_options( experiments, params.dose.experiments.dose_per_image, params.dose.experiments.starting_doses, params.dose.experiments.shared_crystal, ) logger.info( "Interpreting data using:\n starting_doses=%s\n dose_per_image=%s", ", ".join("%s" % i for i in start_doses) if len(set(start_doses)) > 1 else " all %s" % str(start_doses[0]), ", ".join("%s" % i for i in doses_per_image) if len(set(doses_per_image)) > 1 else " all %s" % str(doses_per_image[0]), ) for expt, starting_dose, dose_per_img in zip( experiments, start_doses, doses_per_image ): refls = reflection_table.select(expt) imgno = flex.ceil(refls["xyzobs.px.value"].parts()[2]) dose = (imgno * dose_per_img) + starting_dose doses.extend(dose) doses = doses.iround() return cls(intensities, doses, params)
def do_scale_shifts(self, max_shift_over_esd): x = self.non_linear_ls.step() esd = self.non_linear_ls.covariance_matrix().matrix_packed_u_diagonal() x_over_esd = flex.abs(x/flex.sqrt(esd)) max_val = flex.max(x_over_esd) if max_val < self.convergence_as_shift_over_esd: return True if max_val > max_shift_over_esd: shift_scale = max_shift_over_esd/max_val x *= shift_scale return False
def tst_linear3d_modeller(self): from dials.algorithms.background.simple import Linear3dModeller from dials.array_family import flex modeller = Linear3dModeller() # Generate shoeboxes ma = 100 mb = 1 mc = 2 md = 3 sboxes, masks = self.generate_background(self.size, 1000, ma, mb, mc, md) # Compute model a, b, c, d = [], [], [], [] va, vb, vc, vd = [], [], [], [] pa = [] pv = [] for i in range(1000): model = modeller.create(sboxes[i], masks[i]) assert len(model.params()) == 4 assert len(model.variances()) == 4 p = model.params() v = model.variances() pa.append(tuple(p)) pv.append(tuple(v)) a, b, c, d = zip(*pa) va, vb, vc, vd = zip(*pv) # Compute Z for each parameter za = (flex.double(a) - ma) / flex.sqrt(flex.double(va)) zb = (flex.double(b) - mb) / flex.sqrt(flex.double(vb)) zc = (flex.double(c) - mc) / flex.sqrt(flex.double(vc)) zd = (flex.double(d) - md) / flex.sqrt(flex.double(vd)) # Check it looks standard normal self.assert_std_norm(za) self.assert_std_norm(zb) self.assert_std_norm(zc) self.assert_std_norm(zd) print "OK"
def __init__(O, points, epsilon=1e-2): """\ Computation of Minimum-Volume Covering Ellipsoid using the Khachiyan Algorithm. Based on a Python implementation by Raj Rajashankar (ANL, Nov 2011), which in turn was based on a Matlab script by Nima Moshtagh (2009, http://stackoverflow.com/questions/1768197/bounding-ellipse/1768440#1768440). Caveats: - center and radii are correct, but rotation may permute axes - scales with the square of the number of points """ d = 3 # d is the dimension n = points.size() assert n > 0 # from scitbx.array_family import flex p = points.as_double() p.reshape(flex.grid(n, 3)) p = p.matrix_transpose() q = p.deep_copy() q.resize(flex.grid(4, n), 1) # u = flex.double(n, 1/n) umx = flex.double(flex.grid(n, n), 0) # err = epsilon + 1 while (err > epsilon): umx.matrix_diagonal_set_in_place(u) x_inv = q.matrix_multiply(umx).matrix_multiply_transpose( q).matrix_inversion() m = q.matrix_transpose_multiply( x_inv).matrix_multiply(q).matrix_diagonal() j = flex.max_index(m) maximum = m[j] ascent_step_size = (maximum-d-1)/((d+1)*(maximum-1)) new_u = (1 - ascent_step_size) * u new_u[j] += ascent_step_size err = flex.sum_sq(new_u - u)**0.5 u = new_u # center = p.matrix_multiply(u) umx.matrix_diagonal_set_in_place(u) t1 = p.matrix_multiply(umx).matrix_multiply_transpose(p) t2 = center.matrix_outer_product(center) a = (t1 - t2).matrix_inversion() / d # import scitbx.linalg.svd svd = scitbx.linalg.svd.real(a, accumulate_u=False, accumulate_v=True) size = 1.0/flex.sqrt(svd.sigma) from scitbx import matrix O.center = matrix.col(center) O.radii = matrix.col(size) O.rotation = matrix.sqr(svd.v)
def compute_per_chain(h1,h2,log): cs1 = list(h1.chains()) cs2 = list(h2.chains()) if(len(cs1)==1): return print >> log, "Per chain (min/max/mean):" for c1, c2 in zip(cs1, cs2): label = c1.id r1 = c1.atoms().extract_xyz() r2 = c2.atoms().extract_xyz() d = flex.sqrt((r1 - r2).dot()).min_max_mean().as_tuple() print >> log, label, ": %-8.3f %-8.3f %-8.3f"%d print >> log
def show(self, curr_temp): if(self.verbose): sites_cart = self.xray_structure.sites_cart() es=self.restraints_manager.geometry.energies_sites(sites_cart=sites_cart) a,b = es.bond_deviations()[2], es.angle_deviations()[2] dist = flex.mean(flex.sqrt((self.sites_cart_start - sites_cart).dot())) if(self.fmodel is not None): fmt=" temp=%7.1f r_work=%6.4f r_free=%6.4f dist_moved=%6.2f angles=%6.2f bonds=%6.3f" print >> self.log, fmt%(curr_temp, self.fmodel.r_work(), self.fmodel.r_free(), dist, b, a) else: fmt=" temp=%7.1f dist_moved=%6.2f angles=%6.2f bonds=%6.3f" print >> self.log, fmt%(curr_temp, dist, b, a)
def get_mean(self,scales,offsets): if self.n_sets>2: result = self.means[self.ref]*0 weights = self.means[self.ref]*0 for m,v,s,o in zip(self.means,self.vars,scales,offsets): mm = s*(m+o) w = 1/(flex.sqrt(v)+1e-8) weights += w result = result + mm*w result = result/weights return result else: return self.means[self.ref]
def run(): pdb_inp1 = iotbx.pdb.input(source_info=None, lines=asu) ph1 = pdb_inp1.construct_hierarchy() sites_cart1 = ph1.atoms().extract_xyz() pdb_inp2 = iotbx.pdb.input(file_name='test_asu.pdb') ph2 = pdb_inp2.construct_hierarchy() sites_cart2 = ph2.atoms().extract_xyz() d = flex.sqrt((sites_cart1 - sites_cart2).dot()).as_double().min_max_mean().as_tuple() print 'sites_cart from string in file - sites_cart from same pdb, but from file' print d print '-'*50
def estimate_signal_to_noise(x, y_noisy, y_smoothed, plot=False): """Estimate noise in spectra by subtracting a smoothed spectrum from the original noisy unsmoothed spectrum. See: The extraction of signal to noise values in x-ray absorption spectroscopy A. J. Dent, P. C. Stephenson, and G. N. Greaves Rev. Sci. Instrum. 63, 856 (1992); http://dx.doi.org/10.1063/1.1142627 """ noise = y_noisy - y_smoothed noise_sq = flex.pow2(noise) from xfel.command_line.view_pixel_histograms import sliding_average sigma_sq = sliding_average(noise_sq, n=31) sigma_sq = smoothing.savitzky_golay_filter( x.as_double(), flex.pow2(noise), half_window=20, degree=1)[1] sigma_sq.set_selected(sigma_sq <= 0, flex.mean(sigma_sq)) # or do this instead to use the background region as the source of noise: #signal_to_noise = y_smoothed/math.sqrt(flex.mean(noise_sq[50:190])) signal_to_noise = y_smoothed/flex.sqrt(sigma_sq) #signal_to_noise.set_selected(x < 50, 0) #signal_to_noise.set_selected(x > 375, 0) if plot: from matplotlib import pyplot linewidth=2 pyplot.plot(x, y_noisy, linewidth=linewidth) pyplot.plot(x, y_smoothed, linewidth=linewidth) pyplot_label_axes() pyplot.show() pyplot.plot(x, noise, linewidth=linewidth, label="noise") pyplot.plot(x, flex.sqrt(sigma_sq), linewidth=linewidth, label="sigma") pyplot_label_axes() pyplot.legend(loc=2, prop={'size':20}) pyplot.show() pyplot.plot(x, signal_to_noise, linewidth=linewidth) pyplot_label_axes() pyplot.show() return signal_to_noise
def __init__(self, pdb_hierarchy, eps = 0.01, add_identity=True): self.pdb_hierarchy = pdb_hierarchy n_atoms_per_chain = flex.int() sites_cart_chain_0 = None self.rotation_matrices = [] self.translation_vectors = [] self.back_rotation_matrices = [] self.back_translation_vectors = [] self.ph_first_chain = None # for i_chain, chain in enumerate(pdb_hierarchy.chains()): n_atoms_per_chain.append(chain.atoms_size()) # outlier_found = False if(n_atoms_per_chain.all_eq(n_atoms_per_chain[0])): for i_chain, chain in enumerate(pdb_hierarchy.chains()): if(chain.is_na() or chain.is_protein()): n_atoms_per_chain.append(chain.atoms_size()) if(sites_cart_chain_0 is None and i_chain==0): sites_cart_chain_0 = chain.atoms().extract_xyz() sel = flex.size_t(xrange(sites_cart_chain_0.size())) self.ph_first_chain = pdb_hierarchy.select(sel) if(add_identity): um = scitbx.matrix.sqr(( 1,0,0, 0,1,0, 0,0,1)) zv = scitbx.matrix.col((0, 0, 0)) self.rotation_matrices.append(um) self.translation_vectors.append(zv) self.back_rotation_matrices.append(um) self.back_translation_vectors.append(zv) if(i_chain > 0): # first copy onto others lsq_fit_obj = superpose.least_squares_fit( reference_sites = sites_cart_chain_0, other_sites = chain.atoms().extract_xyz()) self.rotation_matrices.append(lsq_fit_obj.r.transpose()) self.translation_vectors.append(lsq_fit_obj.t) d = flex.sqrt((sites_cart_chain_0- lsq_fit_obj.other_sites_best_fit()).dot()).min_max_mean().as_tuple() if(d[1]>2): outlier_found=True # others onto first copy lsq_fit_obj = superpose.least_squares_fit( reference_sites = chain.atoms().extract_xyz(), other_sites = sites_cart_chain_0) self.back_rotation_matrices.append(lsq_fit_obj.r) self.back_translation_vectors.append(lsq_fit_obj.t) if(outlier_found): self._init()
def run(pdb_str, expected_ids): get_class = iotbx.pdb.common_residue_names_get_class mon_lib_srv = mmtbx.monomer_library.server.server() rotamer_manager = RotamerEval() pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_str) pdb_hierarchy = pdb_inp.construct_hierarchy() result_ids = [] for residue_group in pdb_hierarchy.residue_groups(): for conformer in residue_group.conformers(): for residue in conformer.residues(): sites_cart = residue.atoms().extract_xyz() rotamer_name = rotamer_manager.evaluate_residue(residue=residue) print residue.resname, residue.resseq, rotamer_name result_ids.append(rotamer_name) if(get_class(residue.resname) == "common_amino_acid"): rotamer_iterator = mon_lib_srv.rotamer_iterator( fine_sampling = True, comp_id = residue.resname, atom_names = residue.atoms().extract_name(), sites_cart = sites_cart) if(rotamer_iterator is None or rotamer_iterator.problem_message is not None or rotamer_iterator.rotamer_info is None): rotamer_iterator = None if(rotamer_iterator is not None): d1_min, d2_min = 1.e+9, 1.e+9 for r, rotamer_sites_cart in rotamer_iterator: sites_cart_rot = rotamer_manager.nearest_rotamer_sites_cart( residue=residue) d1= flex.mean(flex.sqrt((sites_cart - sites_cart_rot).dot())) d2= flex.mean(flex.sqrt((sites_cart - rotamer_sites_cart).dot())) if(d1 < d1_min): d1_min = d1 if(d2 < d2_min): d2_min = d2 assert approx_equal(d1_min, d2_min) assert result_ids == expected_ids
def exercise_1(): random.seed(0) flex.set_random_seed(0) pi = get_pdb_inputs(pdb_str=pdb_str_1) f_obs = abs(pi.xrs.structure_factors(d_min = 2.5).f_calc()) r_free_flags = f_obs.generate_r_free_flags(use_lattice_symmetry=False) if(0): pi.ph.adopt_xray_structure(pi.xrs) pi.ph.write_pdb_file(file_name="start.pdb", crystal_symmetry = pi.xrs.crystal_symmetry()) xrs_poor = shake_sites(xrs = pi.xrs.deep_copy_scatterers(), random=False, shift = 1.5, grm=pi.grm) if(0): pi.ph.adopt_xray_structure(xrs_poor) pi.ph.write_pdb_file(file_name="poor.pdb", crystal_symmetry = xrs_poor.crystal_symmetry()) fmodel = mmtbx.f_model.manager( f_obs = f_obs, r_free_flags = r_free_flags, xray_structure = xrs_poor) print "start r_work:", fmodel.r_work() # params = sa.master_params().extract() params.start_temperature=3000 params.final_temperature=0 params.cool_rate = 100 params.number_of_steps = 100 params.update_grads_shift = 0. # sa.run( params = params, fmodel = fmodel, restraints_manager = pi.grm, wx = 20, wc = 1, verbose = True) # r = fmodel.r_work() print "final r_work:", r assert r < 0.03, r dist = flex.mean(flex.sqrt((pi.xrs.sites_cart() - fmodel.xray_structure.sites_cart()).dot())) print "Distance(refined, answer): %6.4f"%dist assert dist < 0.25, dist if(0): pi.ph.adopt_xray_structure(fmodel.xray_structure) pi.ph.write_pdb_file(file_name="refined.pdb", crystal_symmetry = fmodel.xray_structure.crystal_symmetry())
def k_mask_grid_search(self, r_start): k_mask_trial_range = flex.double([i/1000. for i in range(0,650,50)]) k_mask = flex.double(self.f_obs.size(), 0) k_mask_bin = flex.double() for i_cas, cas in enumerate(self.cores_and_selections): selection, core, selection_use, sel_work = cas scale = self.core.k_anisotropic.select(selection) * \ self.core.k_isotropic.select(selection) * \ self.core.k_isotropic_exp.select(selection) f_obs = self.f_obs.select(selection) k_mask_bin_, k_isotropic_bin_ = \ bulk_solvent.k_mask_and_k_overall_grid_search( f_obs.data()/scale, core.f_calc.data(), core.f_mask().data(), k_mask_trial_range, selection_use) k_mask_bin.append(k_mask_bin_) k_mask.set_selected(selection, k_mask_bin_) k_mask_bin_smooth = self.smooth(k_mask_bin) k_mask = self.populate_bin_to_individual_k_mask_linear_interpolation( k_mask_bin = k_mask_bin_smooth) k_isotropic = self._k_isotropic_as_scale_k1(r_start=r_start, k_mask=k_mask) self.core = self.core.update(k_masks = k_mask, k_isotropic = k_isotropic) self.bss_result.k_mask_bin_orig = k_mask_bin self.bss_result.k_mask_bin_smooth = k_mask_bin_smooth self.bss_result.k_mask = k_mask self.bss_result.k_isotropic = k_isotropic r_start = self.r_factor() #### if(len(self.cores_and_selections)>2): x=flex.double() y=flex.double() for i_sel, cas in enumerate(self.cores_and_selections): selection, core, selection_use, sel_work = cas sel = sel_work ss_ = self.ss_bin_values[i_sel][2] k_isotropic_ = flex.mean(self.core.k_isotropic.select(sel)) x.append(ss_) y.append(k_isotropic_) import scitbx.math r = scitbx.math.gaussian_fit_1d_analytical(x = flex.sqrt(x), y = y) r_start = self.r_factor() k_isotropic = r.a*flex.exp(-self.ss*r.b) r = self.try_scale(k_isotropic = k_isotropic) if(r<r_start): self.core = self.core.update(k_isotropic = k_isotropic) r_start = self.r_factor() return r_start
def test_center_of_coordinates_shift(self): """ test shifting translation to and from the center of coordinates of the master ncs copy """ # print sys._getframe().f_code.co_name xrs = self.pdb_inp.xray_structure_simple() nrg = self.ncs_restraints_group_list shifts = nu.get_ncs_groups_centers( xray_structure = xrs, ncs_restraints_group_list=nrg) xyz = self.pdb_inp.atoms().extract_xyz() center_of_coor = (flex.vec3_double([xyz.sum()]) * (1/xyz.size())).round(8) # test shifts t1 = shifts[0].round(8) t2 = shifts[1].round(8) d1 = flex.sqrt((center_of_coor-t1).dot()).min_max_mean().as_tuple() d2 = flex.sqrt((center_of_coor-t2).dot()).min_max_mean().as_tuple() assert (d1 == d2) and (d1 == (0,0,0)) # test shift to center new_nrg = nu.shift_translation_to_center( shifts = shifts, ncs_restraints_group_list=nrg) expected = (-4.62169, -5.42257, 5.288) assert (new_nrg[0].copies[0].t.round(5)).elems == expected # back to original coordinates system old_nrg = nu.shift_translation_back_to_place( shifts=shifts, ncs_restraints_group_list=new_nrg) expected = (old_nrg[0].copies[0].t.round(5)).elems result = (nrg[0].copies[0].t.round(5)).elems assert result == expected
def generate_new_means_and_sigmas(self): s,t,o = self.generate_and_score_samples() nm = self.mean*0.0 nv = self.mean*0.0 for ii in range(self.n_in): nm = nm + s[o[ii]] nv = nv + s[o[ii]]*s[o[ii]] nm = nm/self.n_in nv = nv/self.n_in - nm*nm nv = nv self.mean = self.mean*(1.0-self.alpha) + self.alpha*nm beta = self.beta -self.beta*((1.0-1.0/self.count)**self.q) self.sigma = flex.sqrt( self.sigma*self.sigma*(1.0-beta) + beta*nv) self.compute_target( self.mean )
def compute_per_residue(h1,h2,log): rgs1 = list(h1.residue_groups()) rgs2 = list(h2.residue_groups()) if(len(rgs1)==1): return print >> log, "Per residue (min/max/mean):" for rg1, rg2 in zip(rgs1, rgs2): label = "%10s"%"/".join([ rg1.parent().id.strip(), rg1.resid().strip(), "_".join(list(rg1.unique_resnames()))]) r1 = rg1.atoms().extract_xyz() r2 = rg2.atoms().extract_xyz() d = flex.sqrt((r1 - r2).dot()).min_max_mean().as_tuple() print >> log, label, ": %-8.3f %-8.3f %-8.3f"%d print >> log
def tst_linear2d_modeller(self): from dials.algorithms.background.simple import Linear2dModeller from dials.array_family import flex modeller = Linear2dModeller() # Generate shoeboxes ma = 100 mb = 1 mc = 2 sboxes, masks = self.generate_background(self.size, 1000, ma, mb, mc, 0) pa = [] pv = [] for i in range(1000): model = modeller.create(sboxes[i], masks[i]) assert len(model.params()) == 9 * 3 assert len(model.variances()) == 9 * 3 p = model.params() v = model.variances() for j in range(9): pa.append(tuple(p[3 * j : 3 * (j + 1)])) pv.append(tuple(v[3 * j : 3 * (j + 1)])) a, b, c = zip(*pa) va, vb, vc = zip(*pv) # Compute Z for each parameter za = (flex.double(a) - ma) / flex.sqrt(flex.double(va)) zb = (flex.double(b) - mb) / flex.sqrt(flex.double(vb)) zc = (flex.double(c) - mc) / flex.sqrt(flex.double(vc)) # Check it looks standard normal self.assert_std_norm(za) self.assert_std_norm(zb) self.assert_std_norm(zc) print "OK"
def compute_functional_and_gradients(self): # caculate difference between predicted and observed values self.distribution.set_parameters(p=self.x) predicted = flex.double(self.n) for i in xrange(self.n): predicted[i] = self.distribution.cdf(x=self.x_data[i]) difference = predicted - self.y_data # target function for minimization is sum of rmsd f = flex.sum(flex.sqrt(difference*difference)) gradients = flex.double(len(self.x)) for i in xrange(self.n): g_i = self.distribution.cdf_gradients(x=self.x_data[i]) for j in xrange(len(self.x)): gradients[j] = gradients[j] + difference[i]*g_i[j] gradients = 2.0*gradients return f,gradients