def run(self): from dials.algorithms.image.fill_holes import simple_fill from scitbx.array_family import flex from random import randint from math import sqrt import sys mask = flex.bool(flex.grid(100, 100), True) data = flex.double(flex.grid(100, 100), True) for j in range(100): for i in range(100): data[j,i] = 10 + j * 0.01 + i * 0.01 if sqrt((j - 50)**2 + (i - 50)**2) <= 10.5: mask[j,i] = False data[j,i] = 0 result = simple_fill(data, mask) known = data.as_1d().select(mask.as_1d()) filled = result.as_1d().select(mask.as_1d() == False) assert flex.max(filled) <= flex.max(known) assert flex.min(filled) >= flex.min(known) # Test passed print 'OK'
def test_cma_es_rosebrock_n(M=10): def funct(x,y): result = 0 for xx,yy in zip(x,y): result+=100.0*((yy-xx*xx)**2.0) + (1-xx)**2.0 return result N=M*2 x = flex.double(N,10.0) sd = flex.double(N,3.0) m = cma_es(N,x,sd) while ( not m.converged() ): # sample population p = m.sample_population() pop_size = p.accessor().all()[0] # update objective function v = flex.double(pop_size) for ii in range(pop_size): vector = p[(ii*N):(ii*N + N)] x = vector[0:M] y = vector[M:] v[ii] = funct(x,y) m.update_distribution(v) print list(m.get_result()) print flex.min(v) print x_final = m.get_result() print list(x_final)
def exercise_reference_impl_long(n_dynamics_steps, out): sim = fmri.simulation() e_tots = flex.double([sim.e_tot]) print >> out, "i_step, [e_pot, e_kin_ang, e_kin_lin, e_kin, e_tot]" def show(i_step): print >> out, i_step, [sim.e_pot, sim.e_kin_ang, sim.e_kin_lin, sim.e_kin, sim.e_tot] out.flush() n_show = max(1, n_dynamics_steps // 10) for i_step in xrange(n_dynamics_steps): sim.dynamics_step(delta_t=0.001) e_tots.append(sim.e_tot) if i_step % n_show == 0: show(i_step) show(n_dynamics_steps) print >> out print >> out, "number of dynamics steps:", n_dynamics_steps print >> out, "e_tot start:", e_tots[0] print >> out, " final:", e_tots[-1] print >> out, " min:", flex.min(e_tots) print >> out, " max:", flex.max(e_tots) print >> out, " max-min:", flex.max(e_tots) - flex.min(e_tots) print >> out out.flush()
def remove_common_isotropic_adp(self): xrs = self.xray_structure b_iso_min = flex.min(xrs.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1)) self.b_overall = b_iso_min print >> self.log, "Max B subtracted from atoms and used to sharpen map:", b_iso_min xrs.shift_us(b_shift=-b_iso_min) b_iso_min = flex.min(xrs.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1)) assert approx_equal(b_iso_min, 0, 1.e-3)
def run(args): import libtbx.load_env from dials.array_family import flex from dials.util import log from dials.util.version import dials_version usage = "%s [options] experiment.json indexed.pickle" % \ libtbx.env.dispatcher_name parser = OptionParser( usage=usage, phil=phil_scope, read_reflections=True, read_experiments=True, check_format=False, epilog=help_message) params, options = parser.parse_args(show_diff_phil=True) # Configure the logging log.config(info=params.output.log, debug=params.output.debug_log) logger.info(dials_version()) reflections = flatten_reflections(params.input.reflections) experiments = flatten_experiments(params.input.experiments) if len(reflections) == 0 or len(experiments) == 0: parser.print_help() return assert(len(reflections) == 1) assert(len(experiments) == 1) experiment = experiments[0] reflections = reflections[0] # remove reflections with 0, 0, 0 index zero = (reflections['miller_index'] == (0, 0, 0)) logger.info('Removing %d unindexed reflections' % zero.count(True)) reflections = reflections.select(~zero) h, k, l = reflections['miller_index'].as_vec3_double().parts() h = h.iround() k = k.iround() l = l.iround() logger.info('Range on h: %d to %d' % (flex.min(h), flex.max(h))) logger.info('Range on k: %d to %d' % (flex.min(k), flex.max(k))) logger.info('Range on l: %d to %d' % (flex.min(l), flex.max(l))) test_P1_crystal_indexing(reflections, experiment, params) test_crystal_pointgroup_symmetry(reflections, experiment, params)
def tst_curve_interpolator(): x = flex.double( range(25) )/24.0 y = x*x ip = curve_interpolator(0,2.0,200) x_target = ip.target_x y_ref = x_target*x_target nx,ny,a,b = ip.interpolate(x,y) count = 0 for xx in x_target: if flex.max(x) >= xx: count += 1 assert count==len(nx) for yy,yyy in zip(ny,y_ref): assert approx_equal(yy,yyy,eps=1e-3) assert a[0]==0 assert a[1]==24 assert b[0]==0 assert b[1] in (99,100) x = flex.double( range(5,23) )/24.0 y = x*x ip = curve_interpolator(0,2.0,200) nx,ny,a,b = ip.interpolate(x,y) assert nx[0] >= flex.min(x) assert nx[-1] <= flex.max(x) y_ref= nx*nx for yy,yyy in zip(ny,y_ref): assert approx_equal(yy,yyy,eps=1e-3)
def inject(self,c=3.0): mdelta = flex.max(self.mean - self.last_mean) sdelta = flex.min(self.sigma) if sdelta < self.inject_eps: self.sigma = self.sigma + max(mdelta*c,c*self.inject_eps) self.last_mean = self.mean.deep_copy() self.last_sigma = self.sigma.deep_copy()
def box_iterator(self): b = maptbx.boxes( n_real = self.atom_map_asu.focus(), fraction = self.box_size_as_fraction, max_boxes= self.max_boxes, log = self.log) def get_wide_box(s,e): # define wide box: neutral + phased volumes if(self.neutral_volume_box_cushion_width>0): sh = self.neutral_volume_box_cushion_width ss = [max(s[i]-sh,0) for i in [0,1,2]] ee = [min(e[i]+sh,n_real_asu[i]) for i in [0,1,2]] else: ss,ee = s,e return ss,ee n_real_asu = b.n_real n_boxes = len(b.starts) i_box = 0 for s,e in zip(b.starts, b.ends): i_box+=1 sw,ew = get_wide_box(s=s,e=e) fmodel_omit = self.omit_box(start=sw, end=ew) r = fmodel_omit.r_work() self.r.append(r) # for tests only if(self.log): print >> self.log, "r(curr,min,max,mean)=%6.4f %6.4f %6.4f %6.4f"%(r, flex.min(self.r), flex.max(self.r), flex.mean(self.r)), i_box, n_boxes omit_map_data = self.asu_map_from_fmodel( fmodel=fmodel_omit, map_type=self.map_type) maptbx.copy_box( map_data_from = omit_map_data, map_data_to = self.map_result_asu, start = s, end = e) self.map_result_asu.reshape(self.acc_asu)
def prepare_maps(fofc, two_fofc, fem, fofc_cutoff=2, two_fofc_cutoff=0.5, fem_cutoff=0.5, connectivity_cutoff=0.5, local_average=True): """ - This takes 3 maps: mFo-DFc, 2mFo-DFc and FEM and combines them into one map that is most suitable for real-space refinement. - Maps are the boxes extracted around region of interest from the whole unit cell map. - All maps are expected to be normalized by standard deviation (sigma-scaled) BEFORE extracting the box. There is no way to assert it at this point. - Map gridding equivalence is asserted. """ m1,m2,m3 = fofc, two_fofc, fem # assert identical gridding for m_ in [m1,m2,m3]: for m__ in [m1,m2,m3]: assert m_.all() == m__.all() assert m_.focus() == m__.focus() assert m_.origin() == m__.origin() # binarize residual map sel = m1 <= fofc_cutoff mask = m1 .set_selected( sel, 0) mask = mask.set_selected(~sel, 1) del sel, m1 assert approx_equal([flex.max(mask), flex.min(mask)], [1,0]) def truncate_and_filter(m, cutoff, mask): return m.set_selected(m<=cutoff, 0)*mask # truncate and filter 2mFo-DFc map m2 = truncate_and_filter(m2, two_fofc_cutoff, mask) # truncate and filter FEM m3 = truncate_and_filter(m3, fem_cutoff, mask) del mask # combined maps def scale(m): sd = m.sample_standard_deviation() if(sd != 0): return m/sd else: return m m2 = scale(m2) m3 = scale(m3) m = (m2+m3)/2. del m2, m3 m = scale(m) # connectivity analysis co = maptbx.connectivity(map_data=m, threshold=connectivity_cutoff) v_max=-1.e+9 i_max=None for i, v in enumerate(co.regions()): if(i>0): if(v>v_max): v_max=v i_max=i mask2 = co.result() selection = mask2==i_max mask2 = mask2.set_selected(selection, 1) mask2 = mask2.set_selected(~selection, 0) assert mask2.count(1) == v_max # final filter m = m * mask2.as_double() if(local_average): maptbx.map_box_average(map_data=m, cutoff=0.5, index_span=1) return m
def exercise_sim(out, n_dynamics_steps, delta_t, sim): sim.check_d_pot_d_q() e_pots = flex.double([sim.e_pot]) e_kins = flex.double([sim.e_kin]) for i_step in xrange(n_dynamics_steps): sim.dynamics_step(delta_t=delta_t) e_pots.append(sim.e_pot) e_kins.append(sim.e_kin) e_tots = e_pots + e_kins sim.check_d_pot_d_q() print >> out, "energy samples:", e_tots.size() print >> out, "e_pot min, max:", min(e_pots), max(e_pots) print >> out, "e_kin min, max:", min(e_kins), max(e_kins) print >> out, "e_tot min, max:", min(e_tots), max(e_tots) print >> out, "start e_tot:", e_tots[0] print >> out, "final e_tot:", e_tots[-1] ave = flex.sum(e_tots) / e_tots.size() range = flex.max(e_tots) - flex.min(e_tots) if (ave == 0): relative_range = 0 else: relative_range = range / ave print >> out, "ave:", ave print >> out, "range:", range print >> out, "relative range:", relative_range print >> out out.flush() if (out is sys.stdout): f = open("tmp%02d.xy" % plot_number[0], "w") for es in [e_pots, e_kins, e_tots]: for e in es: print >> f, e print >> f, "&" f.close() plot_number[0] += 1 return relative_range
def exercise_tardy_model(out, n_dynamics_steps, delta_t, tardy_model): tardy_model.check_d_e_pot_d_q() e_pots = flex.double([tardy_model.e_pot()]) e_kins = flex.double([tardy_model.e_kin()]) for i_step in xrange(n_dynamics_steps): tardy_model.dynamics_step(delta_t=delta_t) e_pots.append(tardy_model.e_pot()) e_kins.append(tardy_model.e_kin()) e_tots = e_pots + e_kins tardy_model.check_d_e_pot_d_q() print >> out, "degrees of freedom:", tardy_model.degrees_of_freedom print >> out, "energy samples:", e_tots.size() print >> out, "e_pot min, max:", min(e_pots), max(e_pots) print >> out, "e_kin min, max:", min(e_kins), max(e_kins) print >> out, "e_tot min, max:", min(e_tots), max(e_tots) print >> out, "start e_tot:", e_tots[0] print >> out, "final e_tot:", e_tots[-1] ave = flex.sum(e_tots) / e_tots.size() range = flex.max(e_tots) - flex.min(e_tots) if (ave == 0): relative_range = 0 else: relative_range = range / ave print >> out, "ave:", ave print >> out, "range:", range print >> out, "relative range:", relative_range print >> out out.flush() return relative_range
def __init__(self,rawdata,projection_vector,spotfinder_spot,verbose=False): # projection vector is either the radial or azimuthal unit vector # at a specific Bragg spot position model_center = col((spotfinder_spot.ctr_mass_x(),spotfinder_spot.ctr_mass_y())) px_x,px_y = project_2d_response_onto_line(projection_vector) point_projections = flex.double() pixel_values = flex.double() for point in spotfinder_spot.bodypixels: point_projection = (col((point.x,point.y)) - model_center).dot( projection_vector ) point_projections.append(point_projection) pxval = rawdata[(point.x,point.y)] if verbose: print "point_projection",point_projection, print "signal",pxval pixel_values.append( pxval ) Lmin = flex.min(point_projections) Lmax = flex.max(point_projections) #print "Range %6.2f"%(Lmax-Lmin) Rmin = round(Lmin-2.0,1) Rmax = round(Lmax+2.0,1) #print "Range %6.2f"%(Rmax-Rmin) def histogram_bin (j) : return int(10.*(j-Rmin)) # bin units of 1/10 pixel histo_x = flex.double((int(10*(Rmax-Rmin)))) histo_y = flex.double(len(histo_x)) for ihis in xrange(len(histo_x)): histo_x[ihis] = Rmin + 0.1*ihis for ipp, point_projection in enumerate(point_projections): value = pixel_values[ipp] for isample in xrange(len(px_x)): histo_y[int(10*(point_projection + px_x[isample] - Rmin))] += value * px_y[isample] self.histo_x = histo_x self.histo_y = histo_y
def blank_integrated_analysis(reflections, scan, phi_step, fractional_loss): prf_sel = reflections.get_flags(reflections.flags.integrated_prf) if prf_sel.count(True) > 0: reflections = reflections.select(prf_sel) intensities = reflections["intensity.prf.value"] variances = reflections["intensity.prf.variance"] else: sum_sel = reflections.get_flags(reflections.flags.integrated_sum) reflections = reflections.select(sum_sel) intensities = reflections["intensity.sum.value"] variances = reflections["intensity.sum.variance"] i_sigi = intensities / flex.sqrt(variances) xyz_px = reflections["xyzobs.px.value"] x_px, y_px, z_px = xyz_px.parts() phi = scan.get_angle_from_array_index(z_px) osc = scan.get_oscillation()[1] n_images_per_step = iceil(phi_step / osc) phi_step = n_images_per_step * osc phi_min = flex.min(phi) phi_max = flex.max(phi) n_steps = iceil((phi_max - phi_min) / phi_step) hist = flex.histogram(z_px, n_slots=n_steps) mean_i_sigi = flex.double() for i, slot_info in enumerate(hist.slot_infos()): sel = (z_px >= slot_info.low_cutoff) & (z_px < slot_info.high_cutoff) if sel.count(True) == 0: mean_i_sigi.append(0) else: mean_i_sigi.append(flex.mean(i_sigi.select(sel))) fractional_mean_i_sigi = mean_i_sigi / flex.max(mean_i_sigi) potential_blank_sel = mean_i_sigi <= (fractional_loss * flex.max(mean_i_sigi)) xmin, xmax = zip(*[(slot_info.low_cutoff, slot_info.high_cutoff) for slot_info in hist.slot_infos()]) d = { "data": [ { "x": list(hist.slot_centers()), "y": list(mean_i_sigi), "xlow": xmin, "xhigh": xmax, "blank": list(potential_blank_sel), "type": "bar", "name": "blank_counts_analysis", } ], "layout": {"xaxis": {"title": "z observed (images)"}, "yaxis": {"title": "Number of reflections"}, "bargap": 0}, } blank_regions = blank_regions_from_sel(d["data"][0]) d["blank_regions"] = blank_regions return d
def test1(): dials_regression = libtbx.env.find_in_repositories( relative_path="dials_regression", test=os.path.isdir) data_dir = os.path.join(dials_regression, "centroid_test_data") datablock_path = os.path.join(data_dir, "datablock.json") # work in a temporary directory cwd = os.path.abspath(os.curdir) tmp_dir = open_tmp_directory(suffix="tst_rs_mapper") os.chdir(tmp_dir) cmd = 'dials.rs_mapper ' + datablock_path + ' map_file="junk.ccp4"' result = easy_run.fully_buffered(command=cmd).raise_if_errors() # load results from iotbx import ccp4_map from scitbx.array_family import flex m = ccp4_map.map_reader(file_name="junk.ccp4") assert len(m.data) == 7189057 assert approx_equal(m.header_min, -1.0) assert approx_equal(flex.min(m.data), -1.0) assert approx_equal(m.header_max, 2052.75) assert approx_equal(flex.max(m.data), 2052.75) assert approx_equal(m.header_mean, 0.018606403842568398) assert approx_equal(flex.mean(m.data), 0.018606403842568398) print "OK" return
def apply_default_filter(database_dict, d_min, max_models_for_default_filter, key = "high_resolution"): database_dict = order_by_value(database_dict = database_dict, key = key) values = flex.double() for v in database_dict[key]: values.append(float(v)) diff = flex.abs(values-d_min) min_val = flex.min(diff) i_min_sel = (diff == min_val).iselection() assert i_min_sel.size() > 0 i_min = i_min_sel[i_min_sel.size()//2] i_l = max(0, i_min-max_models_for_default_filter//2) i_r = min(values.size()-1, i_min+max_models_for_default_filter//2) # print "apply_default_filter:" print " found data points dmin->higher =", abs(i_l-i_min) print " found data points dmin->lower =", abs(i_r-i_min) imm = min(abs(i_l-i_min), abs(i_r-i_min)) i_l, i_r = i_min-imm, i_min+imm if (imm == 0) : if (i_l == 0) : i_r = 100 print " used data points dmin->higher =", 0 print " used data points dmin->lower =", i_r elif (i_l == i_r == len(values) - 1) : i_l -= 100 print " used data points dmin->higher =", i_l print " used data points dmin->lower =", 0 else : print " used data points dmin->higher =", imm print " used data points dmin->lower =", imm # selection = flex.bool(values.size(), False) for i in xrange(i_l,i_r): selection[i] = True return select_dict(database_dict = database_dict, selection = selection)
def show(self): b = self.bss_result print >> self.log, " Statistics in resolution bins:" #assert k_mask.size() == len(self.bin_selections) fmt=" %7.5f %6.2f -%6.2f %5.1f %5d %-6s %-6s %-6s %6.3f %6.3f %8.2f %6.4f" f_model = self.core.f_model.data() print >> self.log, " s^2 Resolution Compl Nrefl k_mask k_iso k_ani <Fobs> R" print >> self.log, " (A) (%) orig smooth average" k_mask_bin_orig_ = str(None) k_mask_bin_smooth_ = str(None) k_mask_bin_approx_ = str(None) for i_sel, cas in enumerate(self.cores_and_selections): selection, core, selection_use, sel_work = cas sel = sel_work ss_ = self.ss_bin_values[i_sel][2] if(b is not None and self.bss_result.k_mask_bin_orig is not None): k_mask_bin_orig_ = "%6.4f"%self.bss_result.k_mask_bin_orig[i_sel] if(b is not None and self.bss_result.k_mask_bin_smooth is not None): k_mask_bin_smooth_ = "%6.4f"%self.bss_result.k_mask_bin_smooth[i_sel] k_mask_bin_averaged_ = "%6.4f"%flex.mean(self.core.k_mask().select(sel)) d_ = self.d_spacings.data().select(sel) d_min_ = flex.min(d_) d_max_ = flex.max(d_) n_ref_ = d_.size() f_obs_ = self.f_obs.select(sel) f_obs_mean_ = flex.mean(f_obs_.data()) k_isotropic_ = flex.mean(self.core.k_isotropic.select(sel)) k_anisotropic_ = flex.mean(self.core.k_anisotropic.select(sel)) cmpl_ = f_obs_.completeness(d_max=d_max_)*100. r_ = bulk_solvent.r_factor(f_obs_.data(),f_model.select(sel),1) print >> self.log, fmt%(ss_, d_max_, d_min_, cmpl_, n_ref_, k_mask_bin_orig_, k_mask_bin_smooth_,k_mask_bin_averaged_, k_isotropic_, k_anisotropic_, f_obs_mean_, r_)
def apply_back_trace_of_overall_exp_scale_matrix(self, xray_structure=None): k,b=self.overall_isotropic_kb_estimate() k_total = self.core.k_isotropic * self.core.k_anisotropic * \ self.core.k_isotropic_exp k,b,r = mmtbx.bulk_solvent.fit_k_exp_b_to_k_total(k_total, self.ss, k, b) if(r<0.7): self.k_exp_overall,self.b_exp_overall = k,b if(xray_structure is None): return None b_adj = 0 if([self.k_exp_overall,self.b_exp_overall].count(None)==0 and k != 0): bs1 = xray_structure.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.) def split(b_trace, xray_structure): b_min = xray_structure.min_u_cart_eigenvalue()*adptbx.u_as_b(1.) b_res = min(0, b_min + b_trace+1.e-6) b_adj = b_trace-b_res xray_structure.shift_us(b_shift = b_adj) return b_adj, b_res b_adj,b_res=split(b_trace=self.b_exp_overall,xray_structure=xray_structure) k_new = self.k_exp_overall*flex.exp(-self.ss*b_adj) bs2 = xray_structure.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.) diff = bs2-bs1 assert approx_equal(flex.min(diff), flex.max(diff)) assert approx_equal(flex.max(diff), b_adj) self.core = self.core.update( k_isotropic = self.core.k_isotropic, k_isotropic_exp = self.core.k_isotropic_exp/k_new, k_masks = [m*flex.exp(-self.ss*b_adj) for m in self.core.k_masks]) return group_args( xray_structure = xray_structure, k_isotropic = self.k_isotropic(), k_anisotropic = self.k_anisotropic(), k_mask = self.k_masks(), b_adj = b_adj)
def __init__(self, xray_structure, k_anisotropic, k_masks, ss): self.xray_structure = xray_structure self.k_anisotropic = k_anisotropic self.k_masks = k_masks self.ss = ss # k_total = self.k_anisotropic r = scitbx.math.gaussian_fit_1d_analytical(x=flex.sqrt(self.ss), y=k_total) k,b = r.a, r.b # k,b,r = mmtbx.bulk_solvent.fit_k_exp_b_to_k_total(k_total, self.ss, k, b) k_exp_overall, b_exp_overall = None,None if(r<0.7): k_exp_overall, b_exp_overall = k,b if(self.xray_structure is None): return None b_adj = 0 if([k_exp_overall, b_exp_overall].count(None)==0 and k != 0): bs1 = self.xray_structure.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.) def split(b_trace, xray_structure): b_min = xray_structure.min_u_cart_eigenvalue()*adptbx.u_as_b(1.) b_res = min(0, b_min + b_trace+1.e-6) b_adj = b_trace-b_res xray_structure.shift_us(b_shift = b_adj) return b_adj, b_res b_adj,b_res=split(b_trace=b_exp_overall,xray_structure=self.xray_structure) k_new = k_exp_overall*flex.exp(-self.ss*b_adj) bs2 = self.xray_structure.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.) diff = bs2-bs1 assert approx_equal(flex.min(diff), flex.max(diff)) assert approx_equal(flex.max(diff), b_adj) self.k_anisotropic = self.k_anisotropic/k_new self.k_masks = [m*flex.exp(-self.ss*b_adj) for m in self.k_masks]
def get_summary (self) : """ Returns a simple object for harvesting statistics elsewhere. """ n_anom_peaks = None if (self.anom_peaks is not None) : n_anom_peaks = len(self.anom_peaks.heights) n_water_peaks = n_water_anom_peaks = None if (self.water_peaks is not None) : n_water_peaks = len(self.water_peaks) if (self.water_anom_peaks is not None) : n_water_anom_peaks = len(self.water_anom_peaks) hole_max = peak_max = None if (len(self.peaks.heights) > 0) : peak_max = flex.max(self.peaks.heights) if (len(self.holes.heights) > 0) : hole_max = flex.min(self.holes.heights) n_non_water_anom_peaks = None if (getattr(self, "non_water_anom_peaks", None) is not None) : n_non_water_anom_peaks = len(self.non_water_anom_peaks) return summary( n_peaks_1=(self.peaks.heights > self.map_cutoff).count(True), n_peaks_2=(self.peaks.heights > self.map_cutoff + 3).count(True), n_peaks_3=(self.peaks.heights > self.map_cutoff + 6).count(True), n_holes_1=(self.holes.heights < -self.map_cutoff).count(True), n_holes_2=(self.holes.heights < -self.map_cutoff - 3).count(True), n_holes_3=(self.holes.heights < -self.map_cutoff - 6).count(True), peak_max=peak_max, hole_max=hole_max, n_anom_peaks=n_anom_peaks, n_water_peaks=n_water_peaks, n_water_anom_peaks=n_water_anom_peaks, map_cutoff=self.map_cutoff, anom_map_cutoff=self.anom_map_cutoff, n_non_water_anom_peaks=n_non_water_anom_peaks)
def get_model_stat(file_name): grm = restraints.get_grm(file_name = file_name) r = model_statistics.geometry( pdb_hierarchy = grm.pdb_hierarchy, restraints_manager = grm.restraints_manager, molprobity_scores = True) # XXX very runtime inefficient distances = flex.double() xyz = grm.pdb_hierarchy.atoms().extract_xyz() bond_proxies_simple = grm.restraints_manager.geometry.pair_proxies( sites_cart = xyz).bond_proxies.simple for i, site_i in enumerate(xyz): for j, site_j in enumerate(xyz): if(j>i): bonded = False for proxy in bond_proxies_simple: p1 = list(proxy.i_seqs) p2 = [i,j] p1.sort() p2.sort() if(p1==p2): bonded=True if(not bonded): dist_ij = math.sqrt( (site_i[0]-site_j[0])**2+ (site_i[1]-site_j[1])**2+ (site_i[2]-site_j[2])**2) distances.append(dist_ij) min_nonbonded_distance = flex.min(distances) # bond(rmsd), bond(max), angle(rmsd), angle(max), etc.. #print r.b_mean, r.b_max, r.a_min, r.a_max, r.clashscore, min_nonbonded_distance return min_nonbonded_distance, r.b_max , r.a_max
def get_mean_statistic_for_resolution (d_min, stat_type, range=0.2, out=None) : if (out is None) : out = sys.stdout from scitbx.array_family import flex pkl_file = libtbx.env.find_in_repositories( relative_path = "chem_data/polygon_data/all_mvd.pickle", test = os.path.isfile) db = easy_pickle.load(pkl_file) all_d_min = db['high_resolution'] stat_values = db[stat_type] values_for_range = flex.double() for (d_, v_) in zip(all_d_min, stat_values) : try : d = float(d_) v = float(v_) except ValueError : continue else : if (d > (d_min - range)) and (d < (d_min + range)) : values_for_range.append(v) h = flex.histogram(values_for_range, n_slots=10) print >> out, " %s for d_min = %.3f - %.3f A" % (stat_names[stat_type], d_min-range, d_min+range) min = flex.min(values_for_range) max = flex.max(values_for_range) mean = flex.mean(values_for_range) print >> out, " count: %d" % values_for_range.size() print >> out, " min: %.2f" % min print >> out, " max: %.2f" % max print >> out, " mean: %.2f" % mean print >> out, " histogram of values:" h.show(prefix=" ") return mean
def __init__(self, evaluator, population_size=50, f=None, cr=0.9, eps=1e-2, n_cross=1, max_iter=10000, monitor_cycle=200, out=None, show_progress=False, show_progress_nth_cycle=1, insert_solution_vector=None, dither_constant=0.4): self.dither=dither_constant self.show_progress=show_progress self.show_progress_nth_cycle=show_progress_nth_cycle self.evaluator = evaluator self.population_size = population_size self.f = f self.cr = cr self.n_cross = n_cross self.max_iter = max_iter self.monitor_cycle = monitor_cycle self.vector_length = evaluator.n self.eps = eps self.population = [] self.seeded = False if insert_solution_vector is not None: assert len( insert_solution_vector )==self.vector_length self.seeded = insert_solution_vector for ii in xrange(self.population_size): self.population.append( flex.double(self.vector_length,0) ) self.scores = flex.double(self.population_size,1000) self.optimize() self.best_score = flex.min( self.scores ) self.best_vector = self.population[ flex.min_index( self.scores ) ] self.evaluator.x = self.best_vector if self.show_progress: self.evaluator.print_status( flex.min(self.scores), flex.mean(self.scores), self.population[ flex.min_index( self.scores ) ], 'Final')
def collect(O, rmsd_t_c, param_values): mins = [flex.min(a) for a in rmsd_t_c] means = [flex.mean(a) for a in rmsd_t_c] if (mins[0] < mins[1]): a = "t" else: a = "c" if (means[0] < means[1]): b = "t" else: b = "c" O.data[a+b].append(param_values)
def fsc_model_map(xray_structure, map, d_min, log=sys.stdout, radius=2., n_bins=30, prefix=""): sgn = xray_structure.crystal_symmetry().space_group().type().number() f_calc = xray_structure.structure_factors(d_min=d_min).f_calc() def compute_mc(f_calc, map): return f_calc.structure_factors_from_map( map = map, use_scale = True, anomalous_flag = False, use_sg = False) sites_frac = xray_structure.sites_frac() if(sgn==1): mask = cctbx_maptbx_ext.mask( sites_frac = sites_frac, unit_cell = xray_structure.unit_cell(), n_real = map.all(), mask_value_inside_molecule = 1, mask_value_outside_molecule = 0, radii = flex.double(sites_frac.size(), radius)) mc = compute_mc(f_calc=f_calc, map=map) if(sgn==1): mc_masked = compute_mc(f_calc=f_calc, map=map*mask) del mask print >> log, prefix, "Overall (entire box): %7.4f"%\ f_calc.map_correlation(other = mc) if(sgn==1): cc = f_calc.map_correlation(other = mc_masked) if(cc is not None): print >> log, prefix, "Around atoms (masked): %7.4f"%cc dsd = f_calc.d_spacings().data() if(dsd.size()>1500): f_calc.setup_binner(n_bins = n_bins) else: f_calc.setup_binner(reflections_per_bin = dsd.size()) if(sgn==1): print >> log, prefix, "Bin# Resolution (A) CC CC(masked)" else: print >> log, prefix, "Bin# Resolution (A) CC" fmt1="%2d: %7.3f-%-7.3f %7.4f" for i_bin in f_calc.binner().range_used(): sel = f_calc.binner().selection(i_bin) d = dsd.select(sel) d_min = flex.min(d) d_max = flex.max(d) n = d.size() fc = f_calc.select(sel) fo = mc.select(sel) cc = fc.map_correlation(other = fo) if(sgn==1): fo_masked = mc_masked.select(sel) cc_masked = fc.map_correlation(other = fo_masked) if(cc_masked is not None and cc is not None): fmt2="%2d: %7.3f-%-7.3f %7.4f %7.4f" print >> log, prefix, fmt2%(i_bin, d_max, d_min, cc, cc_masked) else: fmt2="%2d: %7.3f-%-7.3f %s %s" print >> log, prefix, fmt2%(i_bin, d_max, d_min, "none", "none") else: print >> log, prefix, fmt1%(i_bin, d_max, d_min, cc)
def optimize(self): # initialise the population please self.make_random_population() # score the population please self.score_population() converged = False monitor_score = flex.min( self.scores ) self.count = 0 while not converged: self.evolve() location = flex.min_index( self.scores ) if self.show_progress: if self.count%self.show_progress_nth_cycle==0: # make here a call to a custom print_status function in the evaluator function # the function signature should be (min_target, mean_target, best vector) self.evaluator.print_status( flex.min(self.scores), flex.mean(self.scores), self.population[ flex.min_index( self.scores ) ], self.count) self.count += 1 if self.count%self.monitor_cycle==0: if (monitor_score-flex.min(self.scores) ) < self.eps: converged = True else: monitor_score = flex.min(self.scores) rd = (flex.mean(self.scores) - flex.min(self.scores) ) rd = rd*rd/(flex.min(self.scores)*flex.min(self.scores) + self.eps ) if ( rd < self.eps ): converged = True if self.count>=self.max_iter: converged =True
def process_file(file_object, n_slots, data_min, data_max, format_cutoffs): data = flex.double() for line in file_object.read().splitlines(): data.append(float(line)) print "total number of data points:", data.size() if (data_min is None): data_min = flex.min(data) if (data_max is None): data_max = flex.max(data) flex.histogram( data=data, n_slots=n_slots, data_min=data_min, data_max=data_max).show( format_cutoffs=format_cutoffs)
def test_structure_generator(): p = pdb.input(source_info='string',lines=test_pdb) sg = structure_generator() sg.add_species(p,100) sg.randomize() t = sg.translations[0] d = flex.double() for i in xrange(len(t)): for j in xrange(i+1,len(t)): d.append( (flex.double(t[i]) - flex.double(t[j])).norm() ) assert ( flex.min(d) > (sg.min_separation + 2.0*sg.species[0].radius) )
def normalize_start_map(self): rho = self.rho_obs.deep_copy() if(self.start_map == "flat"): rho = flex.double(flex.grid(self.n_real), 1./self.N) elif(self.start_map == "lde"): eps = flex.max(rho)/100. selection_nonpositive = rho <= eps rho = rho.set_selected(selection_nonpositive, eps) elif(self.start_map == "min_shifted"): rho = rho - flex.min(rho) else: raise Sorry("Invalid initial map modification choice.") return rho / flex.sum(rho)
def interpolate(self, x_array, y_array): index_array = [] result_array = [] start_index_user = None end_index_user = None start_index_target = None end_index_target = None user_min = flex.min( x_array ) user_max = flex.max( x_array ) for jj,x in enumerate(self.target_x): this_index = None break_again = False for index,this_x in enumerate(x_array): if this_x - x >= 0: if x >= user_min: if x <= user_max: this_index = index if start_index_user is None: start_index_user = this_index if start_index_target is None: start_index_target = jj end_index_user = this_index end_index_target = jj break index_array.append( this_index ) y = None if this_index is not None: if this_index == 0: y = self.two_point_interpolate( x, x_array[this_index ], y_array[this_index ], x_array[this_index+1], y_array[this_index+1] ) elif this_index == len(x_array)-1: y = self.two_point_interpolate( x, x_array[this_index-1], y_array[this_index-1], x_array[this_index], y_array[this_index] ) else: y = self.parabolic_interpolate( x, x_array[this_index-1], y_array[this_index-1], x_array[this_index ], y_array[this_index ], x_array[this_index+1], y_array[this_index+1] ) result_array.append( y ) n = len(result_array) x = flex.double(self.target_x[start_index_target:end_index_target+1]) y = flex.double(result_array) return x,y,(start_index_user,end_index_user),(start_index_target,end_index_target)
def run_simulation( out, six_dof_type, r_is_qr, mersenne_twister, n_dynamics_steps, delta_t): sim = six_dof_simulation( six_dof_type=six_dof_type, r_is_qr=r_is_qr, mersenne_twister=mersenne_twister) sim_label = 'six_dof(type="%s", r_is_qr=%s)' % ( six_dof_type, str(sim.J.r_is_qr)) sim.check_d_pot_d_q() sites_moved = [sim.sites_moved()] e_pots = flex.double([sim.e_pot]) e_kins = flex.double([sim.e_kin]) for i_step in range(n_dynamics_steps): sim.dynamics_step(delta_t=delta_t) sites_moved.append(sim.sites_moved()) e_pots.append(sim.e_pot) e_kins.append(sim.e_kin) e_tots = e_pots + e_kins sim.check_d_pot_d_q() print(sim_label, file=out) print("e_pot min, max:", min(e_pots), max(e_pots), file=out) print("e_kin min, max:", min(e_kins), max(e_kins), file=out) print("e_tot min, max:", min(e_tots), max(e_tots), file=out) print("start e_tot:", e_tots[0], file=out) print("final e_tot:", e_tots[-1], file=out) ave = flex.sum(e_tots) / e_tots.size() range_ = flex.max(e_tots) - flex.min(e_tots) relative_range = range_ / ave print("ave:", ave, file=out) print("range:", range_, file=out) print("relative range:", relative_range, file=out) print(file=out) out.flush() if (out is sys.stdout): l = sim_label \ .replace(' ', "") \ .replace('"', "") \ .replace("(", "_") \ .replace(")", "_") \ .replace(",", "_") f = open("tmp_%02d_%02d_%s.xy" % (plot_prefix, plot_number[0], l), "w") for es in [e_pots, e_kins, e_tots]: for e in es: print(e, file=f) print("&", file=f) f.close() plot_number[0] += 1 return sim, sim_label, sites_moved, e_tots, relative_range
def __init__(self, rs_vectors, percentile=0.05): from scitbx.array_family import flex NEAR = 10 self.NNBIN = 5 # target number of neighbors per histogram bin # nearest neighbor analysis from annlib_ext import AnnAdaptor query = flex.double() for spot in rs_vectors: # spots, in reciprocal space xyz query.append(spot[0]) query.append(spot[1]) query.append(spot[2]) assert len( rs_vectors) > NEAR # Can't do nearest neighbor with too few spots IS_adapt = AnnAdaptor(data=query, dim=3, k=1) IS_adapt.query(query) direct = flex.double() for i in range(len(rs_vectors)): direct.append(1.0 / math.sqrt(IS_adapt.distances[i])) # determine the most probable nearest neighbor distance (direct space) hst = flex.histogram(direct, n_slots=int(len(rs_vectors) / self.NNBIN)) centers = hst.slot_centers() islot = hst.slots() highest_bin_height = flex.max(islot) most_probable_neighbor = centers[list(islot).index(highest_bin_height)] if False: # to print out the histogramming analysis smin, smax = flex.min(direct), flex.max(direct) stats = flex.mean_and_variance(direct) import sys out = sys.stdout print(" range: %6.2f - %.2f" % (smin, smax), file=out) print(" mean: %6.2f +/- %6.2f on N = %d" % (stats.mean(), stats.unweighted_sample_standard_deviation(), direct.size()), file=out) hst.show(f=out, prefix=" ", format_cutoffs="%6.2f") print("", file=out) # determine the 5th-percentile direct-space distance perm = flex.sort_permutation(direct, reverse=True) percentile = direct[perm[int(percentile * len(rs_vectors))]] MAXTOL = 1.5 # Margin of error for max unit cell estimate self.max_cell = max(MAXTOL * most_probable_neighbor, MAXTOL * percentile) if False: self.plot(direct)
def __init__(self, pdb_hierarchy, xray_structure, params, out=sys.stdout): from cctbx import adptbx from scitbx.array_family import flex self.plot_range = params.plot_range self.chains = [] self.residues = [] b_isos = xray_structure.extract_u_iso_or_u_equiv() * adptbx.u_as_b(1.0) occ = pdb_hierarchy.atoms().extract_occ() model = pdb_hierarchy.models()[0] for chain in model.chains(): main_conf = chain.conformers()[0] is_na = main_conf.is_na() is_protein = main_conf.is_protein() if (not is_protein) and (not is_na): print >> out, "Skipping chain '%s' - not protein or DNA/RNA." % chain.id continue self.chains.append(chain.id) self.residues.append([]) for residue_group in chain.residue_groups(): n_conformers = len(residue_group.atom_groups()) rg_i_seqs = residue_group.atoms().extract_i_seq() rg_occ = residue_group.atoms().extract_occ() if (params.average_b_over == "residue"): use_i_seqs = rg_i_seqs elif (params.average_b_over == "mainchain"): use_i_seqs = [] if (is_protein): for j_seq, atom in enumerate(residue_group.atoms()): #alab = atom.fetch_labels() if (atom.name in [" N ", " C ", " CA ", " O "]): use_i_seqs.append(rg_i_seqs[j_seq]) else: raise Sorry( "Mainchain-only mode not supported for nucleic acids." ) else: use_i_seqs = [] if (is_protein): for j_seq, atom in enumerate(residue_group.atoms()): if (not atom.name in [" N ", " C ", " CA ", " O "]): use_i_seqs.append(rg_i_seqs[j_seq]) if (len(use_i_seqs) > 0): has_partocc = ((flex.min(occ.select(use_i_seqs)) < 1.0) and (n_conformers == 1)) res_info = residue_info( chain_id=chain.id, resseq=residue_group.resseq_as_int(), icode=residue_group.icode, has_altconf=(n_conformers > 1), has_partocc=has_partocc, avg_b=flex.mean(b_isos.select(use_i_seqs))) self.residues[-1].append(res_info)
def exercise_eigensystem(): #random.seed(0) for n in xrange(1, 10): m = flex.double(flex.grid(n, n)) s = tntbx.eigensystem.real(m) assert approx_equal(tuple(s.values()), [0] * n) v = s.vectors() for i in xrange(n): for j in xrange(n): x = 0 if (i == j): x = 1 #assert approx_equal(v[(i,j)], x) v = [] for i in xrange(n): j = (i * 13 + 17) % n v.append(j) m[i * (n + 1)] = j s = tntbx.eigensystem.real(m) if (n == 3): ss = tntbx.eigensystem.real((m[0], m[4], m[8], m[1], m[2], m[5])) assert approx_equal(s.values(), ss.values()) assert approx_equal(s.vectors(), ss.vectors()) v.sort() v.reverse() assert approx_equal(s.values(), v) if (n > 1): assert approx_equal(flex.min(s.vectors()), 0) assert approx_equal(flex.max(s.vectors()), 1) assert approx_equal(flex.sum(s.vectors()), n) for t in xrange(10): for i in xrange(n): for j in xrange(i, n): m[i * n + j] = random.random() - 0.5 if (i != j): m[j * n + i] = m[i * n + j] s = tntbx.eigensystem.real(m) if (n == 3): ss = tntbx.eigensystem.real( (m[0], m[4], m[8], m[1], m[2], m[5])) assert approx_equal(s.values(), ss.values()) assert approx_equal(s.vectors(), ss.vectors()) v = list(s.values()) v.sort() v.reverse() assert list(s.values()) == v for i in xrange(n): l = s.values()[i] x = s.vectors()[i * n:i * n + n] mx = matrix_mul(m, n, n, x, n, 1) lx = [e * l for e in x] assert approx_equal(mx, lx) m = (1.4573362052597449, 1.7361052947659894, 2.8065584999742659, -0.5387293498219814, -0.018204949672480729, 0.44956507395617257)
def run_simulation( out, six_dof_type, r_is_qr, mersenne_twister, n_dynamics_steps, delta_t): sim = six_dof_simulation( six_dof_type=six_dof_type, r_is_qr=r_is_qr, mersenne_twister=mersenne_twister) sim_label = 'six_dof(type="%s", r_is_qr=%s)' % ( six_dof_type, str(sim.J.r_is_qr)) sim.check_d_pot_d_q() sites_moved = [sim.sites_moved()] e_pots = flex.double([sim.e_pot]) e_kins = flex.double([sim.e_kin]) for i_step in xrange(n_dynamics_steps): sim.dynamics_step(delta_t=delta_t) sites_moved.append(sim.sites_moved()) e_pots.append(sim.e_pot) e_kins.append(sim.e_kin) e_tots = e_pots + e_kins sim.check_d_pot_d_q() print >> out, sim_label print >> out, "e_pot min, max:", min(e_pots), max(e_pots) print >> out, "e_kin min, max:", min(e_kins), max(e_kins) print >> out, "e_tot min, max:", min(e_tots), max(e_tots) print >> out, "start e_tot:", e_tots[0] print >> out, "final e_tot:", e_tots[-1] ave = flex.sum(e_tots) / e_tots.size() range = flex.max(e_tots) - flex.min(e_tots) relative_range = range / ave print >> out, "ave:", ave print >> out, "range:", range print >> out, "relative range:", relative_range print >> out out.flush() if (out is sys.stdout): l = sim_label \ .replace(' ', "") \ .replace('"', "") \ .replace("(", "_") \ .replace(")", "_") \ .replace(",", "_") f = open("tmp_%02d_%02d_%s.xy" % (plot_prefix, plot_number[0], l), "w") for es in [e_pots, e_kins, e_tots]: for e in es: print >> f, e print >> f, "&" f.close() plot_number[0] += 1 return sim, sim_label, sites_moved, e_tots, relative_range
def get_binned_intensities(self, n_bins=100): """ Using self.ISIGI, bin the intensities using the following procedure: 1) Find the minimum and maximum intensity values. 2) Divide max-min by n_bins. This is the bin step size The effect is @param n_bins number of bins to use. @return a tuple with an array of selections for each bin and an array of median intensity values for each bin. """ print("Computing intensity bins.", end=' ', file=self.log) ISIGI = self.scaler.ISIGI meanI = ISIGI['mean_scaled_intensity'] sels = [] binned_intensities = [] if True: # intensity range per bin is the same min_meanI = flex.min(meanI) step = (flex.max(meanI)-min_meanI)/n_bins print("Bin size:", step, file=self.log) self.bin_indices = flex.int(len(ISIGI), -1) for i in range(n_bins): if i+1 == n_bins: sel = (meanI >= (min_meanI + step * i)) else: sel = (meanI >= (min_meanI + step * i)) & (meanI < (min_meanI + step * (i+1))) if sel.all_eq(False): continue sels.append(sel) self.bin_indices.set_selected(sel, len(sels)-1) binned_intensities.append((step/2 + step*i)+min_meanI) assert(self.bin_indices == -1).count(True) == False else: # n obs per bin is the same sorted_meanI = meanI.select(flex.sort_permutation(meanI)) bin_size = len(meanI)/n_bins for i in range(n_bins): bin_min = sorted_meanI[int(i*bin_size)] sel = meanI >= bin_min if i+1 == n_bins: bin_max = sorted_meanI[-1] else: bin_max = sorted_meanI[int((i+1)*bin_size)] sel &= meanI < bin_max sels.append(sel) binned_intensities.append(bin_min + ((bin_max-bin_min)/2)) for i, (sel, intensity) in enumerate(zip(sels, binned_intensities)): print("Bin %02d, number of observations: % 10d, midpoint intensity: %f"%(i, sel.count(True), intensity), file=self.log) return sels, binned_intensities
def run(args=None): dxtbx.util.encode_output_as_utf8() datablocks = DataBlockFactory.from_args(args or sys.argv[1:]) assert len(datablocks) == 1 detectors = datablocks[0].unique_detectors() assert len(detectors) == 1 detector = detectors[0] assert len(detector) == 1 px_mm = detector[0].get_px_mm_strategy() assert isinstance(px_mm, ParallaxCorrectedPxMmStrategy) print("Mu: %f mm^-1 " % px_mm.mu()) print("t0: %f mm" % px_mm.t0()) image_size = detector[0].get_image_size()[::-1] xcorr = flex.double(flex.grid(image_size)) ycorr = flex.double(flex.grid(image_size)) pixel_size = detector[0].get_pixel_size() for j in range(xcorr.all()[0]): for i in range(xcorr.all()[1]): x1, y1 = detector[0].pixel_to_millimeter((i, j)) x0, y0 = i * pixel_size[0], j * pixel_size[1] xcorr[j, i] = x1 - x0 ycorr[j, i] = y1 - y0 vmin = min([flex.min(xcorr), flex.min(ycorr)]) vmax = max([flex.max(xcorr), flex.max(ycorr)]) fig, ax = pylab.subplots() pylab.subplot(121) pylab.imshow(xcorr.as_numpy_array(), interpolation="none", vmin=vmin, vmax=vmax) pylab.subplot(122) im = pylab.imshow(ycorr.as_numpy_array(), interpolation="none", vmin=vmin, vmax=vmax) fig.subplots_adjust(right=0.8) cax = fig.add_axes([0.9, 0.1, 0.03, 0.8]) fig.colorbar(im, cax=cax) pylab.show()
def exercise_eigensystem(): #random.seed(0) for n in xrange(1,10): m = flex.double(flex.grid(n,n)) s = tntbx.eigensystem.real(m) assert approx_equal(tuple(s.values()), [0]*n) v = s.vectors() for i in xrange(n): for j in xrange(n): x = 0 if (i == j): x = 1 #assert approx_equal(v[(i,j)], x) v = [] for i in xrange(n): j = (i*13+17) % n v.append(j) m[i*(n+1)] = j s = tntbx.eigensystem.real(m) if (n == 3): ss = tntbx.eigensystem.real((m[0],m[4],m[8],m[1],m[2],m[5])) assert approx_equal(s.values(), ss.values()) assert approx_equal(s.vectors(), ss.vectors()) v.sort() v.reverse() assert approx_equal(s.values(), v) if (n > 1): assert approx_equal(flex.min(s.vectors()), 0) assert approx_equal(flex.max(s.vectors()), 1) assert approx_equal(flex.sum(s.vectors()), n) for t in xrange(10): for i in xrange(n): for j in xrange(i,n): m[i*n+j] = random.random() - 0.5 if (i != j): m[j*n+i] = m[i*n+j] s = tntbx.eigensystem.real(m) if (n == 3): ss = tntbx.eigensystem.real((m[0],m[4],m[8],m[1],m[2],m[5])) assert approx_equal(s.values(), ss.values()) assert approx_equal(s.vectors(), ss.vectors()) v = list(s.values()) v.sort() v.reverse() assert list(s.values()) == v for i in xrange(n): l = s.values()[i] x = s.vectors()[i*n:i*n+n] mx = matrix_mul(m, n, n, x, n, 1) lx = [e*l for e in x] assert approx_equal(mx, lx) m = (1.4573362052597449, 1.7361052947659894, 2.8065584999742659, -0.5387293498219814, -0.018204949672480729, 0.44956507395617257)
def nsd(self,moving,d_moving=None): if self.d_moving is None: self.d_moving = self.get_mean_distance(moving) if d_moving is not None: self.d_moving = d_moving # loop over all sites in fixed, find the minimum for each site tot_rho_mf = 0 tot_rho_fm = 0 for site in moving: dd = self.fixed-site dd = flex.min( dd.norms() ) tot_rho_mf+=dd*dd for site in self.fixed: dd = moving-site dd = flex.min( dd.norms() ) tot_rho_fm+=dd tot_rho_fm = tot_rho_fm / (self.fixed.size()*self.d_fixed ) tot_rho_mf = tot_rho_mf / (moving.size()*self.d_moving ) result = smath.sqrt((tot_rho_fm+tot_rho_mf)/2.0) return result
def weighted_means(self): min_score = flex.min( self.scores ) mw = flex.mean( self.scores ) self.scores = self.scores-min_score wghts = flex.exp( -self.scores*0.50 ) sw = 1e-12+flex.sum( wghts ) mrg = flex.sum(wghts*self.rg)/sw srg = flex.sum(wghts*self.rg*self.rg)/sw mi0 = flex.sum(wghts*self.i0)/sw si0 = flex.sum(wghts*self.i0*self.i0)/sw si0 = math.sqrt(si0-mi0*mi0) srg = math.sqrt(srg-mrg*mrg) return mrg,srg,mi0,si0,mw
def get_binned_intensities(self, n_bins=100): """ Using self.ISIGI, bin the intensities using the following procedure: 1) Find the minimum and maximum intensity values. 2) Divide max-min by n_bins. This is the bin step size The effect is @param n_bins number of bins to use. @return a tuple with an array of selections for each bin and an array of median intensity values for each bin. """ print("Computing intensity bins.", end=' ', file=self.log) all_mean_Is = flex.double() only_means = flex.double() for hkl_id in range(self.scaler.n_refl): hkl = self.scaler.miller_set.indices()[hkl_id] if hkl not in self.scaler.ISIGI: continue n = len(self.scaler.ISIGI[hkl]) # get scaled intensities intensities = flex.double([self.scaler.ISIGI[hkl][i][0] for i in range(n)]) meanI = flex.mean(intensities) only_means.append(meanI) all_mean_Is.extend(flex.double([meanI]*n)) step = (flex.max(only_means)-flex.min(only_means))/n_bins print("Bin size:", step, file=self.log) sels = [] binned_intensities = [] min_all_mean_Is = flex.min(all_mean_Is) for i in range(n_bins): sel = (all_mean_Is > (min_all_mean_Is + step * i)) & (all_mean_Is < (min_all_mean_Is + step * (i+1))) if sel.all_eq(False): continue sels.append(sel) binned_intensities.append((step/2 + step*i)+min(only_means)) for i, (sel, intensity) in enumerate(zip(sels, binned_intensities)): print("Bin %02d, number of observations: % 10d, midpoint intensity: %f"%(i, sel.count(True), intensity), file=self.log) return sels, binned_intensities
def check_adp(u_iso, step=10, out=None): if (out is None): out = sys.stdout min_adp = flex.min(u_iso) if (min_adp <= 0): bad_i_seqs = [] for i_seq in range(len(u_iso)): if (u_iso[i_seq] <= 0): bad_i_seqs.append(i_seq) return bad_i_seqs i = 0 while i < u_iso.size(): if (i + step < u_iso.size()): u_iso_i = u_iso[i:i + step] else: u_iso_i = u_iso[i:] if (u_iso_i.size() >= step // 2): min_adp = flex.min(u_iso) max_adp = flex.max(u_iso) if (abs(min_adp - max_adp) < 0.1): raise Sorry("At least 10 bonded atoms have identical ADPs.") i += step return None
def run(self): from dials.algorithms.image.fill_holes import simple_fill from scitbx.array_family import flex from math import sqrt mask = flex.bool(flex.grid(100, 100), True) data = flex.double(flex.grid(100, 100), True) for j in range(100): for i in range(100): data[j, i] = 10 + j * 0.01 + i * 0.01 if sqrt((j - 50)**2 + (i - 50)**2) <= 10.5: mask[j, i] = False data[j, i] = 0 result = simple_fill(data, mask) known = data.as_1d().select(mask.as_1d()) filled = result.as_1d().select(mask.as_1d() == False) assert flex.max(filled) <= flex.max(known) assert flex.min(filled) >= flex.min(known) # Test passed print 'OK'
def chebyshev_nodes(n, low=-1, high=1, include_limits=False): x = flex.double(range(n)) + 1 x = (2.0 * x - 1.0) / n x = x * math.pi / 2.0 x = -flex.cos(x) if include_limits: span = (flex.max(x) - flex.min(x)) / 2.0 x = x / span x = 0.5 * (low + high) + 0.5 * (high - low) * x if include_limits: x[0] = low x[n - 1] = high return (x)
def main(filenames, map_file, npoints=192, max_resolution=6, reverse_phi=False): rec_range = 1 / max_resolution image = ImageFactory(filenames[0]) panel = image.get_detector()[0] beam = image.get_beam() s0 = beam.get_s0() pixel_size = panel.get_pixel_size() xlim, ylim = image.get_raw_data().all() xy = recviewer.get_target_pixels(panel, s0, xlim, ylim, max_resolution) s1 = panel.get_lab_coord(xy * pixel_size[0]) # FIXME: assumed square pixel s1 = s1 / s1.norms() * (1 / beam.get_wavelength()) # / is not supported... S = s1 - s0 grid = flex.double(flex.grid(npoints, npoints, npoints), 0) cnts = flex.int(flex.grid(npoints, npoints, npoints), 0) for filename in filenames: print "Processing image", filename try: fill_voxels(ImageFactory(filename), grid, cnts, S, xy, reverse_phi, rec_range) except: print " Failed to process. Skipped this." recviewer.normalize_voxels(grid, cnts) uc = uctbx.unit_cell((npoints, npoints, npoints, 90, 90, 90)) ccp4_map.write_ccp4_map(map_file, uc, sgtbx.space_group("P1"), (0, 0, 0), grid.all(), grid, flex.std_string(["cctbx.miller.fft_map"])) return from scitbx import fftpack fft = fftpack.complex_to_complex_3d(grid.all()) grid_complex = flex.complex_double(reals=flex.pow2(grid), imags=flex.double(grid.size(), 0)) grid_transformed = flex.abs(fft.backward(grid_complex)) print flex.max(grid_transformed), flex.min( grid_transformed), grid_transformed.all() ccp4_map.write_ccp4_map(map_file, uc, sgtbx.space_group("P1"), (0, 0, 0), grid.all(), grid_transformed, flex.std_string(["cctbx.miller.fft_map"]))
def set_file(self, file_name, hierarchy=None): self.file_name = os.path.abspath(file_name) from scitbx.array_family import flex if (hierarchy is None): from iotbx import file_reader import iotbx.pdb pdb_in = file_reader.any_file( file_name, force_type="pdb", raise_sorry_if_errors=True, raise_sorry_if_not_expected_format=True) pdb_in.check_file_type("pdb") hierarchy = pdb_in.file_object.hierarchy if (len(hierarchy.models()) > 1): raise Sorry("Multi-MODEL PDB files not supported.") self._hierarchy = hierarchy self.SetTitle("B-factors by chain for %s" % to_unicode(self.file_name)) self.file_txt.SetLabel(to_unicode(self.file_name)) chain_list = wx.ListCtrl(self.panel, -1, style=wx.LC_REPORT, size=(480, 160)) chain_list.InsertColumn(0, "Chain info") chain_list.InsertColumn(1, "Mean B-iso (range)") chain_list.SetColumnWidth(0, 260) chain_list.SetColumnWidth(1, 200) for chain in hierarchy.models()[0].chains(): n_res = len(chain.residue_groups()) chain_atoms = chain.atoms() n_atoms = len(chain_atoms) main_conf = chain.conformers()[0] chain_type = "other" if (main_conf.is_protein()): chain_type = "protein" elif (main_conf.is_na()): chain_type = "nucleic acid" chain_info = "'%s' (%s, %d res., %d atoms)" % ( chain.id, chain_type, n_res, n_atoms) b_iso = chain_atoms.extract_b() b_max = flex.max(b_iso) b_min = flex.min(b_iso) b_mean = flex.mean(b_iso) b_info = "%.2f (%.2f - %.2f)" % (b_mean, b_min, b_max) item = chain_list.InsertStringItem(sys.maxunicode, chain_info) chain_list.SetStringItem(item, 1, b_info) self.panel_sizer.Add(chain_list, 1, wx.EXPAND | wx.ALL, 5) self.panel.Layout() self.panel_sizer.Fit(self.panel) self.Fit()
def exercise_reference_impl_long(n_dynamics_steps, out): sim = fmri.simulation() e_tots = flex.double([sim.e_tot]) print >> out, "i_step, [e_pot, e_kin_ang, e_kin_lin, e_kin, e_tot]" def show(i_step): print >> out, \ i_step, [sim.e_pot, sim.e_kin_ang, sim.e_kin_lin, sim.e_kin, sim.e_tot] out.flush() n_show = max(1, n_dynamics_steps // 10) for i_step in xrange(n_dynamics_steps): sim.dynamics_step(delta_t=0.001) e_tots.append(sim.e_tot) if (i_step % n_show == 0): show(i_step) show(n_dynamics_steps) print >> out print >> out, "number of dynamics steps:", n_dynamics_steps print >> out, "e_tot start:", e_tots[0] print >> out, " final:", e_tots[-1] print >> out, " min:", flex.min(e_tots) print >> out, " max:", flex.max(e_tots) print >> out, " max-min:", flex.max(e_tots) - flex.min(e_tots) print >> out out.flush()
def exercise_mask_data_1(space_group_info, n_sites=100): from cctbx import maptbx from cctbx.masks import vdw_radii_from_xray_structure for d_min in [1, 1.5, 2.1]: for resolution_factor in [1. / 2, 1. / 3, 1. / 4, 1. / 5]: xrs = random_structure.xray_structure( space_group_info=space_group_info, elements=(("O", "N", "C") * (n_sites // 3 + 1))[:n_sites], volume_per_atom=30, min_distance=1) atom_radii = vdw_radii_from_xray_structure(xray_structure=xrs) asu_mask = masks.atom_mask(unit_cell=xrs.unit_cell(), group=xrs.space_group(), resolution=d_min, grid_step_factor=resolution_factor, solvent_radius=1.0, shrink_truncation_radius=1.0) asu_mask.compute(xrs.sites_frac(), atom_radii) mask_data = asu_mask.mask_data_whole_uc() assert flex.min(mask_data) == 0.0 # It's not just 0 and 1 ... assert flex.max(mask_data) == xrs.space_group().order_z() # In fact, it is a mixture ... if 0: # XXX this will rightfully crash mask_data_ = mask_data / xrs.space_group().order_z() s0 = mask_data_ < 0.5 s1 = mask_data_ > 0.5 if (mask_data_.size() != s0.count(True) + s1.count(True)): for d in mask_data_: if (d != 0 and d != 1): print(d, xrs.space_group().order_z()) assert mask_data_.size( ) == s0.count(True) + s1.count(True), [ mask_data_.size() - (s0.count(True) + s1.count(True)) ] if ( 0 ): # XXX This would crash with the message: "... The grid is not ..." cr_gr = maptbx.crystal_gridding( unit_cell=xrs.unit_cell(), d_min=d_min, resolution_factor=resolution_factor) asu_mask = masks.atom_mask(unit_cell=xrs.unit_cell(), space_group=xrs.space_group(), gridding_n_real=cr_gr.n_real(), solvent_radius=1.0, shrink_truncation_radius=1.0) asu_mask.compute(xrs.sites_frac(), atom_radii)
def write_image(file_name='image.png', detector_size=None, image_data=None, max_value=2.0**8 - 1): assert ((detector_size[0] * detector_size[1]) == len(image_data)) working_image_data = image_data.deep_copy() min_image_value = flex.min(working_image_data) if (min_image_value > 0.0): working_image_data = flex.fabs(working_image_data / min_image_value - 1.0) max_image_value = flex.max(working_image_data) working_image_data = max_value / max_image_value * working_image_data image = Image.new('L', detector_size) image.putdata(working_image_data) image = ImageOps.invert(image) image.save(file_name)
def find_candidate_basis_vectors(self): self.d_min = self.params.refinement_protocol.d_min_start sel = self.reflections["id"] == -1 if self.d_min is not None: sel &= 1 / self.reflections["rlp"].norms() > self.d_min reflections = self.reflections.select(sel) self.candidate_basis_vectors, used_in_indexing = self._basis_vector_search_strategy.find_basis_vectors( reflections["rlp"]) self._used_in_indexing = sel.iselection().select(used_in_indexing) if self.d_min is None: self.d_min = flex.min( 1 / self.reflections["rlp"].select(self._used_in_indexing).norms()) self.debug_show_candidate_basis_vectors() return self.candidate_basis_vectors
def fit_data(p): fls = fourier_legendre_series() fls.read_polynomials(p.fls_data) cos_sq = p.q*p.wavelength/(4.0*math.pi) cos_sq = cos_sq*cos_sq sin_sq = 1.0 - cos_sq fit_x = cos_sq + sin_sq*flex.cos(p.x) fit_ac = p.ac.deep_copy() fit_x, fit_ac = zip(*sorted(zip(fit_x,fit_ac))) fit_x = flex.double(fit_x) fit_ac = flex.double(fit_ac) fit_c = fls.compute_coefficients(p.fit_order,fit_ac,fit_x) fit_c = set_odd_coefficients_to_zero(fit_c) fit_v = fls.compute_coefficient_variances(p.fit_order,p.v,fit_x) fit_v = set_odd_coefficients_to_zero(fit_v) if (p.minimize): nz_c = flex.double() for k in xrange(0,len(fit_c),2): if (fit_c[k] < 0.0): fit_c[k] = -fit_c[k] nz_c.append(fit_c[k]) m = lbfgs_optimizer(fit_ac,flex.sqrt(p.v),nz_c,fit_x,fls) nz_v = m.estimate_asymptotic_variance(m.x) assert(nz_c.size() == nz_v.size()) count = 0 for k in xrange(0,len(fit_c),2): fit_c[k] = m.x[count] fit_v[k] = nz_v[count] count += 1 f_min = 1.0 f_max = 1.0 if (p.standardize): # standardize fitted curve to have a min of 1.0 and max of 2.0 old_f = fls.compute_function(fit_c,fit_x) # assume f is positive f_min = flex.min(old_f) f_max = flex.max(flex.fabs(old_f / f_min - 1.0)) scales = (f_min,f_max) return fit_c, fit_v, scales
def is_within(dist, coords_1, coords_2): """Checks if any of coords_1 is within dist of coords_2""" dist_sq = dist**2 if len(coords_2) < len(coords_1): i1 = coords_2 i2 = coords_1 else: i1 = coords_1 i2 = coords_2 for c1 in i1: diffs = i2 - c1 min_d_sq = flex.min(diffs.dot()) if min_d_sq < dist_sq: return True return False
def __init__(self, rawdata, projection_vector, spotfinder_spot, verbose=False): # projection vector is either the radial or azimuthal unit vector # at a specific Bragg spot position model_center = col( (spotfinder_spot.ctr_mass_x(), spotfinder_spot.ctr_mass_y())) px_x, px_y = project_2d_response_onto_line(projection_vector) point_projections = flex.double() pixel_values = flex.double() for point in spotfinder_spot.bodypixels: point_projection = (col( (point.x, point.y)) - model_center).dot(projection_vector) point_projections.append(point_projection) pxval = rawdata[(point.x, point.y)] if verbose: print "point_projection", point_projection, print "signal", pxval pixel_values.append(pxval) Lmin = flex.min(point_projections) Lmax = flex.max(point_projections) #print "Range %6.2f"%(Lmax-Lmin) Rmin = round(Lmin - 2.0, 1) Rmax = round(Lmax + 2.0, 1) #print "Range %6.2f"%(Rmax-Rmin) def histogram_bin(j): return int(10. * (j - Rmin)) # bin units of 1/10 pixel histo_x = flex.double((int(10 * (Rmax - Rmin)))) histo_y = flex.double(len(histo_x)) for ihis in xrange(len(histo_x)): histo_x[ihis] = Rmin + 0.1 * ihis for ipp, point_projection in enumerate(point_projections): value = pixel_values[ipp] for isample in xrange(len(px_x)): histo_y[int(10 * (point_projection + px_x[isample] - Rmin))] += value * px_y[isample] self.histo_x = histo_x self.histo_y = histo_y
def show_residual(self, plot=False): F = self.sb.data.focus() if plot: # Residual for x in range(F[1]): for y in range(F[2]): background = self.a[0] * x + self.a[1] * y + self.a[2] model = background + self.a[3] * self.roi[x, y] print("%4.0f" % (self.sb.data[0, x, y] - model), end=' ') print() print() # analyse the variance print(list(self.a)) diff = flex.double() approx_poisson_sigma = flex.double() for x in range(F[1]): for y in range(F[2]): background = self.a[0] * x + self.a[1] * y + self.a[2] model = background + self.a[3] * self.roi[x, y] diffdata = self.sb.data[0, x, y] - model diff.append(diffdata) if model < 0.: model = 0. # complete kludge avoid a math domain error approx_poisson_sigma.append(math.sqrt(model)) MV = flex.mean_and_variance(diff) fmin, fmax = flex.min(diff), flex.max(diff) print("residual Min=%4.0f, Mean=%4.0f, Max=%4.0f" % (fmin, MV.mean(), fmax)) print("residual stddev=%4.0f" % (MV.unweighted_sample_standard_deviation())) print("ML mean Poisson stddev=%4.0f" % (flex.mean(approx_poisson_sigma))) if plot: from matplotlib import pyplot as plt n, bins, patches = plt.hist(diff, 40, normed=1, facecolor='g', alpha=0.75) plt.xlabel('Model vs. Observation Residual') plt.ylabel('Probability') plt.title('Histogram of Model Residual') plt.show()
def plot_rij_histogram(rij_matrix, key="cosym_rij_histogram"): """Plot a histogram of the rij values. Args: plot_name (str): The file name to save the plot to. If this is not defined then the plot is displayed in interactive mode. """ rij = rij_matrix.as_1d() rij = rij.select(rij != 0) hist = flex.histogram( rij, data_min=min(-1, flex.min(rij)), data_max=max(1, flex.max(rij)), n_slots=100, ) d = { key: { "data": [{ "x": list(hist.slot_centers()), "y": list(hist.slots()), "type": "bar", "name": "Rij histogram", }], "layout": { "title": "Distribution of values in the Rij matrix", "xaxis": { "title": "r<sub>ij</sub>" }, "yaxis": { "title": "Frequency" }, "bargap": 0, }, "help": """\ A histogram of the values of the Rij matrix of pairwise correlation coefficients. A unimodal distribution of values may suggest that no indexing ambiguity is evident, whereas a bimodal distribution can be indicative of the presence of an indexing ambiguity. """, } } return d
def output_image(flex_img, filename, invert=False, scale=False): import Image flex_img = flex_img.deep_copy() flex_img -= flex.min(flex_img) if scale: img_max_value = 2**16 scale = img_max_value / flex.max(flex_img) flex_img = flex_img.as_double() * scale flex_img = flex_img if invert: img_max_value = 2**16 flex_img = img_max_value - flex_img # invert image for display dim = flex_img.all() #easy_pickle.dump("%s/avg_img.pickle" %output_dirname, flex_img) byte_str = flex_img.slice_to_byte_str(0, flex_img.size()) im = Image.fromstring(mode="I", size=(dim[1], dim[0]), data=byte_str) im = im.crop((0, 185, 391, 370)) #im.save("avg.tiff", "TIFF") # XXX This does not work (phenix.python -Qnew option) im.save(filename, "PNG")
def __init__(self, xray_structure, k_anisotropic, k_masks, ss): self.xray_structure = xray_structure self.k_anisotropic = k_anisotropic self.k_masks = k_masks self.ss = ss # k_total = self.k_anisotropic r = scitbx.math.gaussian_fit_1d_analytical(x=flex.sqrt(self.ss), y=k_total) k, b = r.a, r.b # k, b, r = mmtbx.bulk_solvent.fit_k_exp_b_to_k_total( k_total, self.ss, k, b) k_exp_overall, b_exp_overall = None, None if (r < 0.7): k_exp_overall, b_exp_overall = k, b if (self.xray_structure is None): return None b_adj = 0 if ([k_exp_overall, b_exp_overall].count(None) == 0 and k != 0): bs1 = self.xray_structure.extract_u_iso_or_u_equiv( ) * adptbx.u_as_b(1.) def split(b_trace, xray_structure): b_min = xray_structure.min_u_cart_eigenvalue() * adptbx.u_as_b( 1.) b_res = min(0, b_min + b_trace + 1.e-6) b_adj = b_trace - b_res xray_structure.shift_us(b_shift=b_adj) return b_adj, b_res b_adj, b_res = split(b_trace=b_exp_overall, xray_structure=self.xray_structure) k_new = k_exp_overall * flex.exp(-self.ss * b_adj) bs2 = self.xray_structure.extract_u_iso_or_u_equiv( ) * adptbx.u_as_b(1.) diff = bs2 - bs1 assert approx_equal(flex.min(diff), flex.max(diff)) assert approx_equal(flex.max(diff), b_adj) self.k_anisotropic = self.k_anisotropic / k_new self.k_masks = [ m * flex.exp(-self.ss * b_adj) for m in self.k_masks ]
def show(self): b = self.bss_result print(" Statistics in resolution bins:", file=self.log) #assert k_mask.size() == len(self.bin_selections) fmt = " %7.5f %6.2f -%6.2f %5.1f %5d %-6s %-6s %-6s %6.3f %6.3f %8.2f %6.4f" f_model = self.core.f_model.data() print( " s^2 Resolution Compl Nrefl k_mask k_iso k_ani <Fobs> R", file=self.log) print(" (A) (%) orig smooth average", file=self.log) k_mask_bin_orig_ = str(None) k_mask_bin_smooth_ = str(None) k_mask_bin_approx_ = str(None) for i_sel, cas in enumerate(self.cores_and_selections): selection, core, selection_use, sel_work = cas sel = sel_work ss_ = self.ss_bin_values[i_sel][2] if (b is not None and self.bss_result.k_mask_bin_orig is not None): k_mask_bin_orig_ = "%6.4f" % self.bss_result.k_mask_bin_orig[ i_sel] if (b is not None and self.bss_result.k_mask_bin_smooth is not None): k_mask_bin_smooth_ = "%6.4f" % self.bss_result.k_mask_bin_smooth[ i_sel] k_mask_bin_averaged_ = "%6.4f" % flex.mean( self.core.k_mask().select(sel)) d_ = self.d_spacings.data().select(sel) d_min_ = flex.min(d_) d_max_ = flex.max(d_) n_ref_ = d_.size() f_obs_ = self.f_obs.select(sel) f_obs_mean_ = flex.mean(f_obs_.data()) k_isotropic_ = flex.mean(self.core.k_isotropic.select(sel)) k_anisotropic_ = flex.mean(self.core.k_anisotropic.select(sel)) cmpl_ = f_obs_.completeness(d_max=d_max_) * 100. r_ = bulk_solvent.r_factor(f_obs_.data(), f_model.select(sel), 1) print(fmt % (ss_, d_max_, d_min_, cmpl_, n_ref_, k_mask_bin_orig_, k_mask_bin_smooth_, k_mask_bin_averaged_, k_isotropic_, k_anisotropic_, f_obs_mean_, r_), file=self.log)
def do_sigma_scaling(self): # Divide each pixel value by it's dark standard deviation. Since we are led # to believe that the standard deviation of a pixel is proportional to the # gain of said pixel, this approximates a gain correction. assert self.dark_img is not None assert self.gain_map is None # not appropriate to do sigma scaling and gain correction at the same time! flex_cspad_img = self.cspad_img.as_double() flex_cspad_img_sel = flex_cspad_img.as_1d().select(self.dark_mask.as_1d()) flex_dark_stddev = self.dark_stddev.select(self.dark_mask.as_1d()).as_double() assert flex_dark_stddev.count(0) == 0 flex_dark_stddev /= flex.mean(flex_dark_stddev) flex_cspad_img_sel /= flex_dark_stddev flex_cspad_img.as_1d().set_selected(self.dark_mask.as_1d().iselection(), flex_cspad_img_sel) self.cspad_img = flex_cspad_img if 0: # for debugging from matplotlib import pyplot hist_min, hist_max = flex.min(flex_cspad_img_sel.as_double()), flex.max(flex_cspad_img_sel.as_double()) print hist_min, hist_max n_slots = 100 n, bins, patches = pyplot.hist(flex_cspad_img_sel.as_1d().as_numpy_array(), bins=n_slots, range=(hist_min, hist_max)) pyplot.show()
def _batch_bins_and_data(batches, values, function_to_apply): """Apply function to the data from each batch. Return the list of the batch bins and the value for each bin. """ batch_bins = [] data = [] i_batch_start = 0 current_batch = flex.min(batches) n_ref = batches.size() for i_ref in range(n_ref + 1): if i_ref == n_ref or batches[i_ref] != current_batch: assert batches[i_batch_start:i_ref].all_eq(current_batch) values_for_batch = values[i_batch_start:i_ref] data.append(function_to_apply(values_for_batch)) batch_bins.append(current_batch) i_batch_start = i_ref if i_ref < n_ref: current_batch = batches[i_batch_start] return batch_bins, data