def setup_test_sorting(): # Borrowed from tst_reflection_table function tst_find_overlapping N = 110 r = flex.reflection_table.empty_standard(N) r["panel"] = flex.size_t([1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0] * 10) r["id"] = flex.int([1, 2, 1, 1, 2, 0, 1, 1, 1, 0, 1] * 10) exp_ids = flex.size_t([0, 1]) for i in range(N): r["miller_index"][i] = ( int(i // 10) - 5, i % 3, i % 7, ) # A nice bunch of miller indices # Filter out reflections to be used by refinement. Sorting of filtered reflections # require to allow C++ extension modules to give performance benefit. Sorting # performed within the _filter_reflections step by id, then by panel. r_sorted = copy.deepcopy(r) r_sorted.sort("id") r_sorted.subsort("id", "panel") # Test that the unfiltered/unsorted table becomes filtered/sorted for id assert (r_sorted["id"] == r["id"].select(flex.sort_permutation( r["id"]))).count(False) == 0 # as above for panel within each id for ii in [0, 1, 2]: r_id = r.select(r["id"] == ii) r_sorted_id = r_sorted.select(r_sorted["id"] == ii) assert (r_sorted_id["panel"] == r_id["panel"].select( flex.sort_permutation(r_id["panel"]))).count(False) == 0 return (r, r_sorted, exp_ids)
def matcher(reference, moving, params): from annlib_ext import AnnAdaptor as ann_adaptor from dials.array_family import flex rxyz = reference['xyzobs.px.value'].parts() mxyz = moving['xyzobs.px.value'].parts() rxy = flex.vec2_double(rxyz[0], rxyz[1]) mxy = flex.vec2_double(mxyz[0], mxyz[1]) ann = ann_adaptor(rxy.as_double().as_1d(), 2) ann.query(mxy.as_double().as_1d()) distances = flex.sqrt(ann.distances) matches = (distances < params.far) & (distances >= params.close) xyr = flex.vec2_double() xym = flex.vec2_double() for j in range(matches.size()): if not matches[j]: continue xym.append(mxy[j]) xyr.append(rxy[ann.nn[j]]) # filter outliers - use IQR etc. dxy = xym - xyr dx, dy = dxy.parts() iqx = IQR(dx.select(flex.sort_permutation(dx))) iqy = IQR(dy.select(flex.sort_permutation(dy))) keep_x = (dx > (iqx[0] - iqx[3])) & (dx < (iqx[2] + iqx[3])) keep_y = (dy > (iqy[0] - iqy[3])) & (dy < (iqy[2] + iqy[3])) keep = keep_x & keep_y xyr = xyr.select(keep) xym = xym.select(keep) # compute Rt R, t, d, n = Rt(xyr, xym) # verify matches in original image coordinate system from scitbx import matrix import math _R = matrix.sqr(R) rmsd = 0.0 for j, _xym in enumerate(xym): _xymm = _R * _xym + matrix.col(t) rmsd += (matrix.col(xyr[j]) - _xymm).length()**2 assert abs(math.sqrt(rmsd / xym.size()) - d) < 1e-6 return R, t, d, n
def pair_up(reference, moving, params, R0, t0): from annlib_ext import AnnAdaptor as ann_adaptor from dials.array_family import flex rxyz = reference['xyzobs.px.value'].parts() mxyz = moving['xyzobs.px.value'].parts() # apply R0, t0 before performing matching - so should ideally be in almost # right position rxy = flex.vec2_double(rxyz[0], rxyz[1]) _mxy = flex.vec2_double(mxyz[0], mxyz[1]) mxy = flex.vec2_double() for __mxy in _mxy: mxy.append((R0 * __mxy + t0).elems) ann = ann_adaptor(rxy.as_double().as_1d(), 2) ann.query(mxy.as_double().as_1d()) distances = flex.sqrt(ann.distances) matches = (distances < params.far) rsel = flex.size_t() msel = flex.size_t() xyr = flex.vec2_double() xym = flex.vec2_double() for j in range(matches.size()): if not matches[j]: continue msel.append(j) rsel.append(ann.nn[j]) xym.append(mxy[j]) xyr.append(rxy[ann.nn[j]]) # filter outliers - use IQR etc. dxy = xym - xyr dx, dy = dxy.parts() iqx = IQR(dx.select(flex.sort_permutation(dx))) iqy = IQR(dy.select(flex.sort_permutation(dy))) keep_x = (dx > (iqx[0] - iqx[3])) & (dx < (iqx[2] + iqx[3])) keep_y = (dy > (iqy[0] - iqy[3])) & (dy < (iqy[2] + iqy[3])) keep = keep_x & keep_y return rsel.select(keep), msel.select(keep)
def setup_mtz_models(mtzfile): table, uc, sg = data_from_mtz(mtzfile) asu_index_s, d_s = map_indices_to_asu(table["miller_index"], sg, uc) anom_index_s, _ = map_indices_to_asu(table["miller_index"], sg, uc, anom=True) asu_index_s, d_s = map_indices_to_asu(table["miller_index"], sg, uc) anom_index_s, _ = map_indices_to_asu(table["miller_index"], sg, uc, anom=True) intensity_s = table["intensity"] sigma_s = table["sigma"] isel_s = flex.sort_permutation(d_s, reverse=True) sorted_asu_s = asu_index_s.select(isel_s) sorted_anom_s = anom_index_s.select(isel_s) scaled_groups = list(OrderedSet(sorted_asu_s)) Data = collections.namedtuple( "Data", ["intensity", "sigma", "dose", "asu_index", "anom_index"]) data = Data( intensity=intensity_s.select(isel_s), sigma=sigma_s.select(isel_s), dose=table["batch"].select(isel_s), asu_index=sorted_asu_s, anom_index=sorted_anom_s, ) return [MTZModel(data)], scaled_groups, uc, sg
def _perform_quasi_random_selection(Ih_table, n_datasets, min_per_class, min_total, max_total): class_matrix = sparse.matrix(n_datasets, Ih_table.size) Ih_table.Ih_table["class_index"] = Ih_table.Ih_table["dataset_id"] class_matrix = _build_class_matrix(Ih_table.Ih_table, class_matrix) segments_in_groups = class_matrix * Ih_table.h_index_matrix total = flex.double(segments_in_groups.n_cols, 0) for i, col in enumerate(segments_in_groups.cols()): total[i] = col.non_zeroes perm = flex.sort_permutation(total, reverse=True) sorted_class_matrix = segments_in_groups.select_columns(perm) # matrix of segment index vs asu groups # now want to fill up until good coverage across board total_in_classes, cols_not_used = _loop_over_class_matrix( sorted_class_matrix, min_per_class, min_total, max_total) cols_used = flex.bool(sorted_class_matrix.n_cols, True) cols_used.set_selected(cols_not_used, False) actual_cols_used = perm.select(cols_used) # now need to get reflection selection reduced_Ih = Ih_table.select_on_groups_isel(actual_cols_used) indices_this_res = reduced_Ih.Ih_table["loc_indices"] dataset_ids_this_res = reduced_Ih.Ih_table["dataset_id"] n_groups_used = len(actual_cols_used) return indices_this_res, dataset_ids_this_res, n_groups_used, total_in_classes
def generate_exp_list(params, all_exp, all_ref): if params.n_subset is not None: subset_all_exp = [] subset_all_ref = [] n_picked = 0 if params.n_subset_method == "random": while n_picked < params.n_subset: idx = random.randint(0, len(all_exp) - 1) subset_all_exp.append(all_exp.pop(idx)) subset_all_ref.append(all_ref.pop(idx)) n_picked += 1 elif params.n_subset_method == "n_refl": from dials.array_family import flex import cPickle as pickle len_all_ref = flex.size_t( [len(pickle.load(open(A, "rb"))) for A in all_ref]) sort_order = flex.sort_permutation(len_all_ref, reverse=True) for idx in sort_order[:params.n_subset]: subset_all_exp.append(all_exp[idx]) subset_all_ref.append(all_ref[idx]) print "Selecting a subset of %d images with highest n_refl out of %d total." % ( params.n_subset, len(len_all_ref)) all_exp = subset_all_exp all_ref = subset_all_ref return all_exp, all_ref
def generate_exp_list(params, all_exp, all_ref): if params.n_subset is not None: subset_all_exp = [] subset_all_ref = [] n_picked = 0 if params.n_subset_method=="random": while n_picked < params.n_subset: idx = random.randint(0, len(all_exp)-1) subset_all_exp.append(all_exp.pop(idx)) subset_all_ref.append(all_ref.pop(idx)) n_picked += 1 elif params.n_subset_method=="n_refl": from dials.array_family import flex import cPickle as pickle len_all_ref = flex.size_t( [ len(pickle.load(open(A,"rb"))) for A in all_ref ] ) sort_order = flex.sort_permutation(len_all_ref,reverse=True) for idx in sort_order[:params.n_subset]: subset_all_exp.append(all_exp[idx]) subset_all_ref.append(all_ref[idx]) print "Selecting a subset of %d images with highest n_refl out of %d total."%( params.n_subset, len(len_all_ref)) all_exp = subset_all_exp all_ref = subset_all_ref return all_exp, all_ref
def setup_xds_models(xdsasciifile): table, uc, sg, filetype = read_xds_ascii(xdsasciifile) asu_index_s, d_s = map_indices_to_asu(table["miller_index"], sg, uc) anom_index_s, _ = map_indices_to_asu(table["miller_index"], sg, uc, anom=True) intensity_s = table["intensity"] sigma_s = table["sigma"] isel_s = flex.sort_permutation(d_s, reverse=True) sorted_asu_s = asu_index_s.select(isel_s) sorted_anom_s = anom_index_s.select(isel_s) scaled_groups = list(OrderedSet(sorted_asu_s)) Data = collections.namedtuple( "Data", ["intensity", "sigma", "dose", "asu_index", "anom_index"]) data = Data( intensity=intensity_s.select(isel_s), sigma=sigma_s.select(isel_s), dose=table["z"].select(isel_s), asu_index=sorted_asu_s, anom_index=sorted_anom_s, ) label = None if filetype == "integrated": label = "XDS\nintegrated" elif filetype == "scaled": label = "XDS\nscaled" return [XDSModel(data, label=label)], scaled_groups, uc, sg
def plot_ordered_d_star_sq(reflections, imageset): if pyplot is None: raise Sorry("matplotlib must be installed to generate a plot.") d_star_sq = flex.pow2(reflections['rlp'].norms()) perm = flex.sort_permutation(d_star_sq) pyplot.scatter(list(range(len(perm))), list(d_star_sq.select(perm)), marker='+') pyplot.show()
def plot_ordered_d_star_sq(reflections, imageset): from matplotlib import pyplot d_star_sq = flex.pow2(reflections["rlp"].norms()) perm = flex.sort_permutation(d_star_sq) pyplot.scatter(list(range(len(perm))), list(d_star_sq.select(perm)), marker="+") pyplot.show()
def refl_analysis(self, dials_model): """This function sets up some data structures (spots_*) allowing us to index into the spots and pixels of interest. These will be repeatedly used during parameter refinement to calculate target function and intermediate statistics. """ Z = self.refl_table indices = Z['miller_index'] expts = ExperimentListFactory.from_json_file(dials_model, check_format=False) self.dials_model = expts[0] CRYS = self.dials_model.crystal UC = CRYS.get_unit_cell() strong_resolutions = UC.d(indices) order = flex.sort_permutation(strong_resolutions, reverse=True) Z["spots_order"] = order self.spots_pixels = flex.size_t() spots_offset = flex.int(len(order), -1) spots_size = flex.int(len(order), -1) P = panels = Z['panel'] S = shoeboxes = Z['shoebox'] N_visited = 0 N_bad = 0 for oidx in range( len(order)): #loop through the shoeboxes in correct order sidx = order[oidx] # index into the Miller indices ipanel = P[sidx] slow_size = 254 fast_size = 254 panel_size = slow_size * fast_size bbox = S[sidx].bbox first_position = spots_offset[sidx] = self.spots_pixels.size() for islow in range(max(0, bbox[2] - 3), min(slow_size, bbox[3] + 3)): for ifast in range(max(0, bbox[0] - 3), min(fast_size, bbox[1] + 3)): value = self.trusted_mask[ipanel][islow * slow_size + ifast] N_visited += 1 if value: self.spots_pixels.append(ipanel * panel_size + islow * slow_size + ifast) else: N_bad += 1 spot_size = spots_size[sidx] = self.spots_pixels.size( ) - first_position Z["spots_offset"] = spots_offset Z["spots_size"] = spots_size print( N_visited, "pixels were visited in the %d shoeboxes (with borders)" % len(order)) print( N_bad, "of these were bad pixels, leaving %d in target" % (len(self.spots_pixels)))
def _probplot_data(self): """Generate the data for a normal probability plot of z-scores.""" for key, rtable in self.rtables.items(): order = flex.sort_permutation(rtable["intensity.z_score"]) osm = flex.double(rtable.size(), 0) probplot = scipy.stats.probplot(rtable["intensity.z_score"], fit=False) osm.set_selected(order, flex.double(probplot[0])) rtable["intensity.order_statistic_medians"] = osm self.rtables[key] = rtable
def run(args): user_phil = [] for arg in args: if os.path.isfile(arg): filename = arg else: try: user_phil.append(parse(arg)) except Exception as e: raise Sorry("Unrecognized argument: %s" % arg) params = phil_scope.fetch(sources=user_phil).extract() name = os.path.basename(filename) base, ext = os.path.splitext(name) filea = base + "_a" + ext fileb = base + "_b" + ext data = easy_pickle.load(filename) if params.use_selection is None: sel = flex.random_permutation(len(data)) else: sel = easy_pickle.load(params.use_selection) assert len(sel) == len( data), "Length of selection doesn't match length of input" data_a = data.select(sel[:len(data) // 2]) data_b = data.select(sel[len(data) // 2:]) data_a = data_a.select(flex.sort_permutation(data_a["id"])) data_b = data_b.select(flex.sort_permutation(data_b["id"])) assert len(data_a) + len(data_b) == len(data) easy_pickle.dump(filea, data_a) easy_pickle.dump(fileb, data_b) if params.output_selection is not None: easy_pickle.dump(params.output_selection, sel)
def run(): from cctbx import sgtbx awesomeness_index = [] for i in range(230): sg = sgtbx.space_group_info(number=i+1).group() awesomeness_index.append(len(sg)/(i+1)) from dials.array_family import flex perm = flex.sort_permutation(flex.double(awesomeness_index), reverse=True) for rank, i in enumerate(perm): sgi = sgtbx.space_group_info(number=i+1) print "%3i %.2f" %(rank+1, awesomeness_index[i]), sgi from matplotlib import pyplot pyplot.scatter(range(1, 231), awesomeness_index) pyplot.xlabel('Space group number') pyplot.ylabel('Space group awesomeness index') pyplot.show()
def find_rmsd_from_refl_tables(experiments, reflections, num_images): rmsd = flex.double() all_refl = [] for ii, expt in enumerate(experiments): refl_now = reflections.select(reflections['id'] == ii) dR = flex.double() for refl in refl_now: dR.append((col(refl['xyzcal.mm']) - col(refl['xyzobs.mm.value'])).length()) rmsd.append(1000.0 * math.sqrt(dR.dot(dR) / len(dR))) idx_list = list(flex.sort_permutation(rmsd, reverse=True)[0:num_images]) reqd_expt = ExperimentList() reqd_refl = flex.reflection_table() for ii, idx in enumerate(idx_list): reqd_expt.append(experiments[idx]) refl = reflections.select(reflections['id'] == idx) refl['id'].set_selected(flex.size_t(range(len(refl['id']))), ii) reqd_refl.extend(refl) return reqd_expt, reqd_refl
def select_highly_connected_reflections_in_bin(Ih_table_block, min_per_class=2, min_total=1000, max_total=10000): """Select highly connected reflections within a resolution shell.""" n = flex.double(Ih_table_block.size, 1.0) * Ih_table_block.h_index_matrix sel = n > 1 if sel.count(True) == 0: return None, None Ih_table_block.Ih_table["loc_indices"] = flex.size_t( range(0, Ih_table_block.size)) Ih_table_block = Ih_table_block.select_on_groups(sel) from scitbx import sparse class_matrix = sparse.matrix(12, Ih_table_block.size) class_matrix = _build_class_matrix(Ih_table_block.Ih_table, class_matrix) segments_in_groups = class_matrix * Ih_table_block.h_index_matrix total = flex.int(segments_in_groups.n_cols, 0) for i, col in enumerate(segments_in_groups.cols()): total[i] = col.non_zeroes perm = flex.sort_permutation(total, reverse=True) sorted_class_matrix = segments_in_groups.select_columns(perm) # matrix of segment index vs asu groups # now want to fill up until good coverage across board total_in_classes, cols_not_used = _loop_over_class_matrix( sorted_class_matrix, min_per_class, min_total, max_total) cols_used = flex.bool(sorted_class_matrix.n_cols, True) cols_used.set_selected(cols_not_used, False) actual_cols_used = perm.select(cols_used) # now need to get reflection selection reduced_Ih = Ih_table_block.select_on_groups_isel(actual_cols_used) indices = reduced_Ih.Ih_table["loc_indices"] return indices, total_in_classes
def run(): from cctbx import sgtbx awesomeness_index = [] for i in range(230): sg = sgtbx.space_group_info(number=i + 1).group() awesomeness_index.append(len(sg) / (i + 1)) from dials.array_family import flex perm = flex.sort_permutation(flex.double(awesomeness_index), reverse=True) for rank, i in enumerate(perm): sgi = sgtbx.space_group_info(number=i + 1) print("%3i %.2f" % (rank + 1, awesomeness_index[i]), sgi) from matplotlib import pyplot pyplot.scatter(range(1, 231), awesomeness_index) pyplot.xlabel("Space group number") pyplot.ylabel("Space group awesomeness index") pyplot.show()
def find_rmsd_from_files(filenames, root, num_images, rank=0): rmsd = flex.double() all_expt = [] all_refl = [] for filename in filenames: fjson = os.path.join(root, filename) experiments = ExperimentListFactory.from_json_file(fjson) fpickle = os.path.join( root, filename.split('refined_experiments')[0] + 'indexed.pickle') reflections = load(fpickle) ref_predictor = ExperimentsPredictorFactory.from_experiments( experiments, force_stills=experiments.all_stills()) reflections = ref_predictor(reflections) for ii, expt in enumerate(experiments): cbf_now = experiments[ii].imageset.get_image_identifier(0).split( '/')[-1] refl_now = reflections.select(reflections['id'] == ii) dR = flex.double() for refl in refl_now: dR.append((col(refl['xyzcal.mm']) - col(refl['xyzobs.mm.value'])).length()) rmsd.append(1000.0 * math.sqrt(dR.dot(dR) / len(dR))) all_expt.append(expt) all_refl.append(refl_now) # Now sort it reqd_expt = ExperimentList() reqd_refl = flex.reflection_table() idx_list = list(flex.sort_permutation(rmsd, reverse=True)[0:num_images]) print(idx_list) for ii, idx in enumerate(idx_list): reqd_expt.append(all_expt[idx]) refl = all_refl[idx] refl['id'].set_selected(flex.size_t(range(len(refl['id']))), ii) reqd_refl.extend(refl) return (reqd_expt, reqd_refl)
def two_color_grid_search(self): '''creates candidate reciprocal lattice points based on two beams and performs 2-D grid search based on maximizing the functional using N_UNIQUE_V candidate vectors (N_UNIQUE_V is usually 30 from Guildea paper)''' assert len(self.imagesets) == 1 detector = self.imagesets[0].get_detector() mm_spot_pos = self.map_spots_pixel_to_mm_rad(self.reflections,detector,scan=None) self.map_centroids_to_reciprocal_space(mm_spot_pos,detector,self.beams[0], goniometer=None) self.reciprocal_lattice_points1 = mm_spot_pos['rlp'].select( (self.reflections['id'] == -1)) rlps1 = mm_spot_pos['rlp'].select( (self.reflections['id'] == -1)) self.map_centroids_to_reciprocal_space(mm_spot_pos,detector,self.beams[1], goniometer=None) self.reciprocal_lattice_points2 = mm_spot_pos['rlp'].select( (self.reflections['id'] == -1)) # assert len(self.beams) == 3 rlps2 = mm_spot_pos['rlp'].select( (self.reflections['id'] == -1)) self.reciprocal_lattice_points=rlps1.concatenate(rlps2) #self.map_centroids_to_reciprocal_space(mm_spot_pos,detector,self.beams[2],goniometer=None) #self.reciprocal_lattice_points = mm_spot_pos['rlp'].select( # (self.reflections['id'] == -1)&(1/self.reflections['rlp'].norms() > d_min)) print "Indexing from %i reflections" %len(self.reciprocal_lattice_points) def compute_functional(vector): '''computes functional for 2-D grid search''' two_pi_S_dot_v = 2 * math.pi * self.reciprocal_lattice_points.dot(vector) return flex.sum(flex.cos(two_pi_S_dot_v)) from rstbx.array_family import flex from rstbx.dps_core import SimpleSamplerTool assert self.target_symmetry_primitive is not None assert self.target_symmetry_primitive.unit_cell() is not None SST = SimpleSamplerTool( self.params.real_space_grid_search.characteristic_grid) SST.construct_hemisphere_grid(SST.incr) cell_dimensions = self.target_symmetry_primitive.unit_cell().parameters()[:3] unique_cell_dimensions = set(cell_dimensions) print("Makring search vecs") spiral_method = True if spiral_method: basis_vec_noise =True noise_scale = 2. #_N = 200000 # massively oversample the hemisphere so we can apply noise to our search _N = 100000 print "Number of search vectors: %i" %( _N * len(unique_cell_dimensions)) J = _N*2 _thetas = [np.arccos( (2.*j - 1. - J)/J) for j in range(1,J+1)] _phis = [ np.sqrt( np.pi*J) *np.arcsin( (2.*j - 1. - J)/J ) for j in range(1,J+1)] _x = np.sin(_thetas)*np.cos(_phis) _y = np.sin(_thetas)*np.sin(_phis) _z = np.cos(_thetas) nn = int(_N * 1.01) _u_vecs = np.array(zip(_x,_y,_z))[-nn:] rec_pts = np.array([self.reciprocal_lattice_points[i] for i in range(len(self.reciprocal_lattice_points))]) N_unique = len(unique_cell_dimensions) # much faster to use numpy for massively over-sampled hemisphere.. func_vals = np.zeros( nn*N_unique) vecs = np.zeros( (nn*N_unique, 3) ) for i, l in enumerate(unique_cell_dimensions): # create noise model on top of lattice lengths... if basis_vec_noise: vec_mag = np.random.normal( l, scale=noise_scale, size=_u_vecs.shape[0] ) vec_mag = vec_mag[:,None] else: vec_mag = l ul = _u_vecs * vec_mag func_slc = slice( i*nn, (i+1)*nn) vecs[func_slc] = ul func_vals[func_slc] = np.sum( np.cos( 2*np.pi*np.dot(rec_pts, ul.T) ), axis=0) order = np.argsort(func_vals)[::-1] # sort function values, largest values first function_values = func_vals[order] vectors = vecs[order] else: # fall back on original flex method vectors = flex.vec3_double() function_values = flex.double() print "Number of search vectors: %i" % ( len(SST.angles)* len(unique_cell_dimensions)) for i, direction in enumerate(SST.angles): for l in unique_cell_dimensions: v = matrix.col(direction.dvec) * l f = compute_functional(v.elems) vectors.append(v.elems) function_values.append(f) perm = flex.sort_permutation(function_values, reverse=True) vectors = vectors.select(perm) function_values = function_values.select(perm) print("made search vecs") unique_vectors = [] i = 0 while len(unique_vectors) < N_UNIQUE_V: v = matrix.col(vectors[i]) is_unique = True if i > 0: for v_u in unique_vectors: if v.length() < v_u.length(): if is_approximate_integer_multiple(v, v_u): is_unique = False break elif is_approximate_integer_multiple(v_u, v): is_unique = False break if is_unique: unique_vectors.append(v) i += 1 print ("chose unique basis vecs") if self.params.debug: for i in range(N_UNIQUE_V): v = matrix.col(vectors[i]) print v.elems, v.length(), function_values[i] basis_vectors = [v.elems for v in unique_vectors] self.candidate_basis_vectors = basis_vectors if self.params.optimise_initial_basis_vectors: self.params.optimize_initial_basis_vectors = False # todo: verify this reference to self.reciprocal_lattice_points is correct optimised_basis_vectors = optimise_basis_vectors( self.reciprocal_lattice_points, basis_vectors) optimised_function_values = flex.double([ compute_functional(v) for v in optimised_basis_vectors]) perm = flex.sort_permutation(optimised_function_values, reverse=True) optimised_basis_vectors = optimised_basis_vectors.select(perm) optimised_function_values = optimised_function_values.select(perm) unique_vectors = [matrix.col(v) for v in optimised_basis_vectors] print "Number of unique vectors: %i" %len(unique_vectors) if self.params.debug: for i in range(len(unique_vectors)): print compute_functional(unique_vectors[i].elems), unique_vectors[i].length(), unique_vectors[i].elems print crystal_models = [] self.candidate_basis_vectors = unique_vectors if self.params.debug: self.debug_show_candidate_basis_vectors() if self.params.debug_plots: self.debug_plot_candidate_basis_vectors() candidate_orientation_matrices \ = self.find_candidate_orientation_matrices( unique_vectors) # max_combinations=self.params.basis_vector_combinations.max_try) FILTER_BY_MAG = True if FILTER_BY_MAG: print("\n\n FILTERING BY MAG\n\n") FILTER_TOL = 10,3 # within 5 percent of params and 1 percent of ang target_uc = self.params.known_symmetry.unit_cell.parameters() good_mats = [] for c in candidate_orientation_matrices: uc = c.get_unit_cell().parameters() comps = [] for i in range(3): tol = 0.01* FILTER_TOL[0] * target_uc[i] low = target_uc[i] - tol/2. high = target_uc[i] + tol/2 comps.append( low < uc[i] < high ) for i in range(3,6): low = target_uc[i] - FILTER_TOL[1] high = target_uc[i] + FILTER_TOL[1] comps.append( low < uc[i] < high ) if all( comps): print("matrix is ok:", c) good_mats.append(c) print("\nFilter kept %d / %d mats" % \ (len(good_mats), len(candidate_orientation_matrices))) candidate_orientation_matrices = good_mats crystal_model, n_indexed = self.choose_best_orientation_matrix( candidate_orientation_matrices) orange = 2 if crystal_model is not None: crystal_models = [crystal_model] else: crystal_models = [] #assert len(crystal_models) > 0 candidate_orientation_matrices = crystal_models #for i in range(len(candidate_orientation_matrices)): #if self.target_symmetry_primitive is not None: ##print "symmetrizing model" ##self.target_symmetry_primitive.show_summary() #symmetrized_model = self.apply_symmetry( #candidate_orientation_matrices[i], self.target_symmetry_primitive) #candidate_orientation_matrices[i] = symmetrized_model self.candidate_crystal_models = candidate_orientation_matrices # memory leak somewhere... probably not here.. but just in case... del _x, _y, _z, _u_vecs, order, rec_pts, vecs, func_vals, vectors, function_values
def estimate_resolution_limit_distl_method1(reflections, plot_filename=None): # Implementation of Method 1 (section 2.4.4) of: # Z. Zhang, N. K. Sauter, H. van den Bedem, G. Snell and A. M. Deacon # J. Appl. Cryst. (2006). 39, 112-119 # https://doi.org/10.1107/S0021889805040677 variances = reflections["intensity.sum.variance"] sel = variances > 0 reflections = reflections.select(sel) d_star_sq = flex.pow2(reflections["rlp"].norms()) d_spacings = uctbx.d_star_sq_as_d(d_star_sq) d_star_cubed = flex.pow(reflections["rlp"].norms(), 3) step = 2 while len(reflections) / step > 40: step += 1 order = flex.sort_permutation(d_spacings, reverse=True) ds3_subset = flex.double() d_subset = flex.double() for i in range(len(reflections) // step): ds3_subset.append(d_star_cubed[order[i * step]]) d_subset.append(d_spacings[order[i * step]]) x = flex.double(range(len(ds3_subset))) # (i) # Usually, Pm is the last point, that is, m = n. But m could be smaller than # n if an unusually high number of spots are detected around a certain # intermediate resolution. In that case, our search for the image resolution # does not go outside the spot 'bump;. This is particularly useful when # ice-rings are present. slopes = (ds3_subset[1:] - ds3_subset[0]) / (x[1:] - x[0]) skip_first = 3 p_m = flex.max_index(slopes[skip_first:]) + 1 + skip_first # (ii) x1 = matrix.col((0, ds3_subset[0])) x2 = matrix.col((p_m, ds3_subset[p_m])) gaps = flex.double([0]) v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize() for i in range(1, p_m): x0 = matrix.col((i, ds3_subset[i])) r = x1 - x0 g = abs(v.dot(r)) gaps.append(g) mv = flex.mean_and_variance(gaps) s = mv.unweighted_sample_standard_deviation() # (iii) p_k = flex.max_index(gaps) g_k = gaps[p_k] p_g = p_k for i in range(p_k + 1, len(gaps)): g_i = gaps[i] if g_i > (g_k - 0.5 * s): p_g = i d_g = d_subset[p_g] noisiness = 0 n = len(ds3_subset) for i in range(n - 1): for j in range(i + 1, n - 1): if slopes[i] >= slopes[j]: noisiness += 1 noisiness /= (n - 1) * (n - 2) / 2 if plot_filename is not None: from matplotlib import pyplot fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.scatter(range(len(ds3_subset)), ds3_subset) ax.set_ylabel("D^-3") xlim = pyplot.xlim() ylim = pyplot.ylim() ax.vlines(p_g, ylim[0], ylim[1], colors="red") pyplot.xlim(0, xlim[1]) pyplot.ylim(0, ylim[1]) pyplot.savefig(plot_filename) pyplot.close() return d_g, noisiness
def integration_concept_detail(self, experiments, reflections, spots, image_number, cb_op_to_primitive, **kwargs): detector = experiments[0].detector crystal = experiments[0].crystal from cctbx.crystal import symmetry c_symmetry = symmetry(space_group=crystal.get_space_group(), unit_cell=crystal.get_unit_cell()) self.image_number = image_number NEAR = 10 pxlsz = detector[0].get_pixel_size() Predicted = self.get_predictions_accounting_for_centering( experiments, reflections, cb_op_to_primitive, **kwargs) FWMOSAICITY = self.inputai.getMosaicity() self.DOMAIN_SZ_ANG = kwargs.get("domain_size_ang", self.__dict__.get("actual", 0)) refineflag = {True: 0, False: 1}[kwargs.get("domain_size_ang", 0) == 0] c_symmetry.show_summary( prefix="EXCURSION%1d REPORT FWMOS= %6.4f DOMAIN= %6.1f " % (refineflag, FWMOSAICITY, self.DOMAIN_SZ_ANG)) from annlib_ext import AnnAdaptor self.cell = c_symmetry.unit_cell() query = flex.double() print len(self.predicted) for pred in self.predicted: # predicted spot coord in pixels query.append(pred[0] / pxlsz[0]) query.append(pred[1] / pxlsz[1]) self.reserve_hkllist_for_signal_search = self.hkllist reference = flex.double() assert self.length > NEAR # Can't do spot/pred matching with too few spots for spot in spots: reference.append(spot.ctr_mass_x()) reference.append(spot.ctr_mass_y()) IS_adapt = AnnAdaptor(data=reference, dim=2, k=NEAR) IS_adapt.query(query) idx_cutoff = float(min(self.mask_focus[image_number])) from rstbx.apps.slip_helpers import slip_callbacks cache_refinement_spots = getattr(slip_callbacks.slip_callback, "requires_refinement_spots", False) indexed_pairs_provisional = [] correction_vectors_provisional = [] c_v_p_flex = flex.vec3_double() this_setting_matched_indices = reflections["miller_index"] for j, item in enumerate(this_setting_matched_indices): this_setting_index = self.hkllist.first_index(item) if this_setting_index: Match = dict(spot=j, pred=this_setting_index) indexed_pairs_provisional.append(Match) vector = matrix.col([ reflections["xyzobs.px.value"][j][0] - self.predicted[Match["pred"]][0] / pxlsz[0], reflections["xyzobs.px.value"][j][1] - self.predicted[Match["pred"]][1] / pxlsz[1] ]) correction_vectors_provisional.append(vector) c_v_p_flex.append((vector[0], vector[1], 0.)) self.N_correction_vectors = len(correction_vectors_provisional) self.rmsd_px = math.sqrt(flex.mean(c_v_p_flex.dot(c_v_p_flex))) print "... %d provisional matches" % self.N_correction_vectors, print "r.m.s.d. in pixels: %6.3f" % (self.rmsd_px) if self.horizons_phil.integration.enable_residual_scatter: from matplotlib import pyplot as plt fig = plt.figure() for cv in correction_vectors_provisional: plt.plot([cv[1]], [-cv[0]], "r.") plt.title(" %d matches, r.m.s.d. %5.2f pixels" % (len(correction_vectors_provisional), math.sqrt(flex.mean(c_v_p_flex.dot(c_v_p_flex))))) plt.axes().set_aspect("equal") self.show_figure(plt, fig, "res") plt.close() if self.horizons_phil.integration.enable_residual_map: from matplotlib import pyplot as plt PX = reflections["xyzobs.px.value"] fig = plt.figure() for match, cv in zip(indexed_pairs_provisional, correction_vectors_provisional): plt.plot([PX[match["spot"]][1]], [-PX[match["spot"]][0]], "r.") plt.plot([self.predicted[match["pred"]][1] / pxlsz[1]], [-self.predicted[match["pred"]][0] / pxlsz[0]], "g.") plt.plot( [PX[match["spot"]][1], PX[match["spot"]][1] + 10. * cv[1]], [ -PX[match["spot"]][0], -PX[match["spot"]][0] - 10. * cv[0] ], 'r-') if kwargs.get("user-reentrant") != None and self.horizons_phil.integration.spot_prediction == "dials" \ and self.horizons_phil.integration.enable_residual_map_deltapsi: from rstbx.apps.stills.util import residual_map_special_deltapsi_add_on residual_map_special_deltapsi_add_on( reflections=self.dials_spot_prediction, matches=indexed_pairs_provisional, experiments=experiments, hkllist=self.hkllist, predicted=self.predicted, plot=plt, eta_deg=FWMOSAICITY, deff=self.DOMAIN_SZ_ANG) plt.xlim([0, detector[0].get_image_size()[1]]) plt.ylim([-detector[0].get_image_size()[0], 0]) plt.title(" %d matches, r.m.s.d. %5.2f pixels" % (len(correction_vectors_provisional), math.sqrt(flex.mean(c_v_p_flex.dot(c_v_p_flex))))) plt.axes().set_aspect("equal") self.show_figure(plt, fig, "map") plt.close() indexed_pairs = indexed_pairs_provisional correction_vectors = correction_vectors_provisional ########### skip outlier rejection for this derived class ### However must retain the ability to write out correction vectiors. if True: # at Aaron's request; test later correction_lengths = flex.double( [v.length() for v in correction_vectors_provisional]) clorder = flex.sort_permutation(correction_lengths) sorted_cl = correction_lengths.select(clorder) indexed_pairs = [] correction_vectors = [] self.correction_vectors = [] for icand in xrange(len(sorted_cl)): # somewhat arbitrary sigma = 1.0 cutoff for outliers indexed_pairs.append(indexed_pairs_provisional[clorder[icand]]) correction_vectors.append( correction_vectors_provisional[clorder[icand]]) if cache_refinement_spots: self.spotfinder.images[self.frame_numbers[ self.image_number]]["refinement_spots"].append( spots[reflections[indexed_pairs[-1]["spot"]] ['spotfinder_lookup']]) if kwargs.get("verbose_cv") == True: print "CV OBSCENTER %7.2f %7.2f REFINEDCENTER %7.2f %7.2f" % ( float(self.inputpd["size1"]) / 2., float(self.inputpd["size2"]) / 2., self.inputai.xbeam() / pxlsz[0], self.inputai.ybeam() / pxlsz[1]), print "OBSSPOT %7.2f %7.2f PREDSPOT %7.2f %7.2f" % ( reflections[indexed_pairs[-1]["spot"]] ['xyzobs.px.value'][0], reflections[ indexed_pairs[-1]["spot"]]['xyzobs.px.value'][1], self.predicted[indexed_pairs[-1]["pred"]][0] / pxlsz[0], self.predicted[indexed_pairs[-1]["pred"]][1] / pxlsz[1]), the_hkl = self.hkllist[indexed_pairs[-1]["pred"]] print "HKL %4d %4d %4d" % the_hkl, "%2d" % self.setting_id, radial, azimuthal = spots[indexed_pairs[-1][ "spot"]].get_radial_and_azimuthal_size( self.inputai.xbeam() / pxlsz[0], self.inputai.ybeam() / pxlsz[1]) print "RADIALpx %5.3f AZIMUTpx %5.3f" % (radial, azimuthal) # Store a list of correction vectors in self. radial, azimuthal = spots[ indexed_pairs[-1]['spot']].get_radial_and_azimuthal_size( self.inputai.xbeam() / pxlsz[0], self.inputai.ybeam() / pxlsz[1]) self.correction_vectors.append( dict(obscenter=(float(self.inputpd['size1']) / 2, float(self.inputpd['size2']) / 2), refinedcenter=(self.inputai.xbeam() / pxlsz[0], self.inputai.ybeam() / pxlsz[1]), obsspot=(reflections[indexed_pairs[-1] ['spot']]['xyzobs.px.value'][0], reflections[indexed_pairs[-1] ['spot']]['xyzobs.px.value'][1]), predspot=( self.predicted[indexed_pairs[-1]['pred']][0] / pxlsz[0], self.predicted[indexed_pairs[-1]['pred']][1] / pxlsz[1]), hkl=(self.hkllist[indexed_pairs[-1]['pred']][0], self.hkllist[indexed_pairs[-1]['pred']][1], self.hkllist[indexed_pairs[-1]['pred']][2]), setting_id=self.setting_id, radial=radial, azimuthal=azimuthal)) self.inputpd["symmetry"] = c_symmetry self.inputpd["symmetry"].show_summary(prefix="SETTING ") if self.horizons_phil.integration.model == "user_supplied": # Not certain of whether the reentrant_* dictionary keys create a memory leak if kwargs.get("user-reentrant", None) == None: kwargs["reentrant_experiments"] = experiments kwargs["reentrant_reflections"] = reflections from cxi_user import post_outlier_rejection self.indexed_pairs = indexed_pairs self.spots = spots post_outlier_rejection(self, image_number, cb_op_to_primitive, self.horizons_phil, kwargs) return ########### finished with user-supplied code correction_lengths = flex.double( [v.length() for v in correction_vectors]) self.r_residual = pxlsz[0] * flex.mean(correction_lengths) #assert len(indexed_pairs)>NEAR # must have enough indexed spots if (len(indexed_pairs) <= NEAR): raise Sorry("Not enough indexed spots, only found %d, need %d" % (len(indexed_pairs), NEAR)) reference = flex.double() for item in indexed_pairs: reference.append(spots[item["spot"]].ctr_mass_x()) reference.append(spots[item["spot"]].ctr_mass_y()) PS_adapt = AnnAdaptor(data=reference, dim=2, k=NEAR) PS_adapt.query(query) self.BSmasks = [] # do not use null: self.null_correction_mapping( predicted=self.predicted, self.positional_correction_mapping( predicted=self.predicted, correction_vectors=correction_vectors, PS_adapt=PS_adapt, IS_adapt=IS_adapt, spots=spots) # which spots are close enough to interfere with background? MAXOVER = 6 OS_adapt = AnnAdaptor(data=query, dim=2, k=MAXOVER) #six near nbrs OS_adapt.query(query) if self.mask_focus[image_number] is None: raise Sorry( "No observed/predicted spot agreement; no Spotfinder masks; skip integration" ) nbr_cutoff = 2.0 * max(self.mask_focus[image_number]) FRAME = int(nbr_cutoff / 2) #print "The overlap cutoff is %d pixels"%nbr_cutoff nbr_cutoff_sq = nbr_cutoff * nbr_cutoff #print "Optimized C++ section...", self.set_frame(FRAME) self.set_background_factor(kwargs["background_factor"]) self.set_nbr_cutoff_sq(nbr_cutoff_sq) self.set_guard_width_sq(self.horizons_phil.integration.guard_width_sq) self.set_detector_gain(self.horizons_phil.integration.detector_gain) flex_sorted = flex.int() for item in self.sorted: flex_sorted.append(item[0]) flex_sorted.append(item[1]) if self.horizons_phil.integration.mask_pixel_value is not None: self.set_mask_pixel_val( self.horizons_phil.integration.mask_pixel_value) image_obj = self.imagefiles.imageindex( self.frame_numbers[self.image_number]) image_obj.read() rawdata = image_obj.linearintdata # assume image #1 if self.inputai.active_areas != None: self.detector_xy_draft = self.safe_background( rawdata=rawdata, predicted=self.predicted, OS_adapt=OS_adapt, sorted=flex_sorted, tiles=self.inputai.active_areas.IT, tile_id=self.inputai.active_areas.tile_id) else: self.detector_xy_draft = self.safe_background( rawdata=rawdata, predicted=self.predicted, OS_adapt=OS_adapt, sorted=flex_sorted) for i in xrange(len(self.predicted)): # loop over predicteds B_S_mask = {} keys = self.get_bsmask(i) for k in xrange(0, len(keys), 2): B_S_mask[(keys[k], keys[k + 1])] = True self.BSmasks.append(B_S_mask) #print "Done" return
def __call__(self): """Determine optimal mosaicity and domain size model (monochromatic)""" if self.refinery is None: RR = self.reflections else: RR = self.refinery.predict_for_reflection_table(self.reflections) all_crystals = [] self.nv_acceptance_flags = flex.bool(len(self.reflections["id"])) from dxtbx.model import MosaicCrystalSauter2014 for iid, experiment in enumerate(self.experiments): excursion_rad = RR["delpsical.rad"].select(RR["id"] == iid) delta_psi_deg = excursion_rad * 180.0 / math.pi logger.info("") logger.info("%s %s", flex.max(delta_psi_deg), flex.min(delta_psi_deg)) mean_excursion = flex.mean(delta_psi_deg) logger.info( "The mean excursion is %7.3f degrees, r.m.s.d %7.3f", mean_excursion, math.sqrt(flex.mean(RR["delpsical2"].select(RR["id"] == iid))), ) crystal = MosaicCrystalSauter2014(self.experiments[iid].crystal) self.experiments[iid].crystal = crystal beam = self.experiments[iid].beam miller_indices = self.reflections["miller_index"].select( self.reflections["id"] == iid) # FIXME XXX revise this formula so as to use a different wavelength potentially for each reflection two_thetas = crystal.get_unit_cell().two_theta( miller_indices, beam.get_wavelength(), deg=True) dspacings = crystal.get_unit_cell().d(miller_indices) # First -- try to get a reasonable envelope for the observed excursions. # minimum of three regions; maximum of 50 measurements in each bin logger.info("fitting parameters on %d spots", len(excursion_rad)) n_bins = min(max(3, len(excursion_rad) // 25), 50) bin_sz = len(excursion_rad) // n_bins logger.info("nbins %s bin_sz %s", n_bins, bin_sz) order = flex.sort_permutation(two_thetas) two_thetas_env = flex.double() dspacings_env = flex.double() excursion_rads_env = flex.double() for x in range(0, n_bins): subset = order[x * bin_sz:(x + 1) * bin_sz] two_thetas_env.append(flex.mean(two_thetas.select(subset))) dspacings_env.append(flex.mean(dspacings.select(subset))) excursion_rads_env.append( flex.max(flex.abs(excursion_rad.select(subset)))) # Second -- parameter fit # solve the normal equations sum_inv_u_sq = flex.sum(dspacings_env * dspacings_env) sum_inv_u = flex.sum(dspacings_env) sum_te_u = flex.sum(dspacings_env * excursion_rads_env) sum_te = flex.sum(excursion_rads_env) Normal_Mat = sqr( (sum_inv_u_sq, sum_inv_u, sum_inv_u, len(dspacings_env))) Vector = col((sum_te_u, sum_te)) solution = Normal_Mat.inverse() * Vector s_ang = 1.0 / (2 * solution[0]) logger.info("Best LSQ fit Scheerer domain size is %9.2f ang", s_ang) k_degrees = solution[1] * 180.0 / math.pi logger.info( "The LSQ full mosaicity is %8.5f deg; half-mosaicity %9.5f", 2 * k_degrees, k_degrees, ) from xfel.mono_simulation.max_like import minimizer # coerce the estimates to be positive for max-likelihood lower_limit_domain_size = ( math.pow(crystal.get_unit_cell().volume(), 1.0 / 3.0) * 3 ) # params.refinement.domain_size_lower_limit d_estimate = max(s_ang, lower_limit_domain_size) M = minimizer( d_i=dspacings, psi_i=excursion_rad, eta_rad=abs(2.0 * solution[1]), Deff=d_estimate, ) logger.info( "ML: mosaicity FW=%4.2f deg, Dsize=%5.0fA on %d spots", M.x[1] * 180.0 / math.pi, 2.0 / M.x[0], len(two_thetas), ) tan_phi_rad_ML = dspacings / (2.0 / M.x[0]) tan_phi_deg_ML = tan_phi_rad_ML * 180.0 / math.pi tan_outer_deg_ML = tan_phi_deg_ML + 0.5 * M.x[1] * 180.0 / math.pi # Only set the flags for those reflections that were indexed for this lattice self.nv_acceptance_flags.set_selected( self.reflections["id"] == iid, flex.abs(delta_psi_deg) < tan_outer_deg_ML, ) if ( self.graph_verbose ): # params.refinement.mosaic.enable_AD14F7B: # Excursion vs resolution fit AD1TF7B_MAX2T = 30.0 AD1TF7B_MAXDP = 1.0 from matplotlib import pyplot as plt plt.plot(two_thetas, delta_psi_deg, "bo") minplot = flex.min(two_thetas) plt.plot([0, minplot], [mean_excursion, mean_excursion], "k-") LR = flex.linear_regression(two_thetas, delta_psi_deg) model_y = LR.slope() * two_thetas + LR.y_intercept() plt.plot(two_thetas, model_y, "k-") plt.title( "ML: mosaicity FW=%4.2f deg, Dsize=%5.0fA on %d spots" % (M.x[1] * 180.0 / math.pi, 2.0 / M.x[0], len(two_thetas))) plt.plot(two_thetas, tan_phi_deg_ML, "r.") plt.plot(two_thetas, -tan_phi_deg_ML, "r.") plt.plot(two_thetas, tan_outer_deg_ML, "g.") plt.plot(two_thetas, -tan_outer_deg_ML, "g.") plt.xlim([0, AD1TF7B_MAX2T]) plt.ylim([-AD1TF7B_MAXDP, AD1TF7B_MAXDP]) plt.show() plt.close() from xfel.mono_simulation.util import green_curve_area self.green_curve_area = green_curve_area(two_thetas, tan_outer_deg_ML) logger.info("The green curve area is %s", self.green_curve_area) crystal.set_half_mosaicity_deg(M.x[1] * 180.0 / (2.0 * math.pi)) crystal.set_domain_size_ang(2.0 / M.x[0]) self._ML_full_mosaicity_rad = M.x[1] self._ML_domain_size_ang = 2.0 / M.x[0] # params.refinement.mosaic.model_expansion_factor """The expansion factor should be initially set to 1, then expanded so that the # reflections matched becomes as close as possible to # of observed reflections input, in the last integration call. Determine this by inspecting the output log file interactively. Do not exceed the bare minimum threshold needed. The intention is to find an optimal value, global for a given dataset.""" model_expansion_factor = 1.4 crystal.set_half_mosaicity_deg(crystal.get_half_mosaicity_deg() * model_expansion_factor) crystal.set_domain_size_ang(crystal.get_domain_size_ang() / model_expansion_factor) if (self.ewald_proximal_volume(iid) > self.params.indexing.stills.ewald_proximal_volume_max): raise DialsIndexError("Ewald proximity volume too high, %f" % self.ewald_proximal_volume(iid)) all_crystals.append(crystal) return all_crystals
def estimate_resolution_limit(reflections, ice_sel=None, plot_filename=None): if ice_sel is None: ice_sel = flex.bool(len(reflections), False) d_star_sq = flex.pow2(reflections["rlp"].norms()) d_spacings = uctbx.d_star_sq_as_d(d_star_sq) intensities = reflections["intensity.sum.value"] variances = reflections["intensity.sum.variance"] sel = variances > 0 intensities = intensities.select(sel) variances = variances.select(sel) ice_sel = ice_sel.select(sel) i_over_sigi = intensities / flex.sqrt(variances) log_i_over_sigi = flex.log(i_over_sigi) fit = flex.linear_regression(d_star_sq.select(~ice_sel), log_i_over_sigi.select(~ice_sel)) m = fit.slope() c = fit.y_intercept() log_i_sigi_lower = flex.double() d_star_sq_lower = flex.double() log_i_sigi_upper = flex.double() d_star_sq_upper = flex.double() binner = binner_equal_population(d_star_sq, target_n_per_bin=20, max_slots=20, min_slots=5) outliers_all = flex.bool(len(reflections), False) low_percentile_limit = 0.1 upper_percentile_limit = 1 - low_percentile_limit for i_slot, slot in enumerate(binner.bins): sel_all = (d_spacings < slot.d_max) & (d_spacings >= slot.d_min) sel = ~(ice_sel) & sel_all if sel.count(True) == 0: continue outliers = wilson_outliers(reflections.select(sel_all), ice_sel=ice_sel.select(sel_all)) outliers_all.set_selected(sel_all, outliers) isel = sel_all.iselection().select(~(outliers) & ~(ice_sel).select(sel_all)) log_i_over_sigi_sel = log_i_over_sigi.select(isel) d_star_sq_sel = d_star_sq.select(isel) perm = flex.sort_permutation(log_i_over_sigi_sel) i_lower = perm[int(math.floor(low_percentile_limit * len(perm)))] i_upper = perm[int(math.floor(upper_percentile_limit * len(perm)))] log_i_sigi_lower.append(log_i_over_sigi_sel[i_lower]) log_i_sigi_upper.append(log_i_over_sigi_sel[i_upper]) d_star_sq_upper.append(d_star_sq_sel[i_lower]) d_star_sq_lower.append(d_star_sq_sel[i_upper]) fit_upper = flex.linear_regression(d_star_sq_upper, log_i_sigi_upper) m_upper = fit_upper.slope() c_upper = fit_upper.y_intercept() fit_lower = flex.linear_regression(d_star_sq_lower, log_i_sigi_lower) m_lower = fit_lower.slope() c_lower = fit_lower.y_intercept() if m_upper == m_lower: intersection = (-1, -1) resolution_estimate = -1 inside = flex.bool(len(d_star_sq), False) else: # http://en.wikipedia.org/wiki/Line%E2%80%93line_intersection#Given_the_equations_of_the_lines # with: # a_ = m_upper # b_ = m_lower # c_ = c_upper # d_ = c_lower # intersection == ((d_ - c_) / (a_ - b_), (a_ * d_ - b_ * c_) / (a_ - b_)) intersection = ( (c_lower - c_upper) / (m_upper - m_lower), (m_upper * c_lower - m_lower * c_upper) / (m_upper - m_lower), ) inside = points_below_line(d_star_sq, log_i_over_sigi, m_upper, c_upper) inside = inside & ~outliers_all & ~ice_sel if inside.count(True) > 0: d_star_sq_estimate = flex.max(d_star_sq.select(inside)) resolution_estimate = uctbx.d_star_sq_as_d(d_star_sq_estimate) else: resolution_estimate = -1 if plot_filename is not None: from matplotlib import pyplot fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.scatter(d_star_sq, log_i_over_sigi, marker="+") ax.scatter( d_star_sq.select(inside), log_i_over_sigi.select(inside), marker="+", color="green", ) ax.scatter( d_star_sq.select(ice_sel), log_i_over_sigi.select(ice_sel), marker="+", color="black", ) ax.scatter( d_star_sq.select(outliers_all), log_i_over_sigi.select(outliers_all), marker="+", color="grey", ) ax.scatter(d_star_sq_upper, log_i_sigi_upper, marker="+", color="red") ax.scatter(d_star_sq_lower, log_i_sigi_lower, marker="+", color="red") if intersection[0] <= ax.get_xlim( )[1] and intersection[1] <= ax.get_ylim()[1]: ax.scatter([intersection[0]], [intersection[1]], marker="x", s=50, color="b") xlim = pyplot.xlim() ax.plot(xlim, [(m * x + c) for x in xlim]) ax.plot(xlim, [(m_upper * x + c_upper) for x in xlim], color="red") ax.plot(xlim, [(m_lower * x + c_lower) for x in xlim], color="red") ax.set_xlabel("d_star_sq") ax.set_ylabel("ln(I/sigI)") ax.set_xlim((max(-xlim[1], -0.05), xlim[1])) ax.set_ylim((0, ax.get_ylim()[1])) for i_slot, slot in enumerate(binner.bins): if i_slot == 0: ax.vlines( uctbx.d_as_d_star_sq(slot.d_max), 0, ax.get_ylim()[1], linestyle="dotted", color="grey", ) ax.vlines( uctbx.d_as_d_star_sq(slot.d_min), 0, ax.get_ylim()[1], linestyle="dotted", color="grey", ) ax_ = ax.twiny() # ax2 is responsible for "top" axis and "right" axis xticks = ax.get_xticks() xticks_d = [ uctbx.d_star_sq_as_d(ds2) if ds2 > 0 else 0 for ds2 in xticks ] ax_.set_xticks(xticks) ax_.set_xlim(ax.get_xlim()) ax_.set_xlabel(r"Resolution ($\AA$)") ax_.set_xticklabels(["%.1f" % d for d in xticks_d]) pyplot.savefig(plot_filename) pyplot.close() return resolution_estimate
def __call__(self): """Determine optimal mosaicity and domain size model (monochromatic)""" RR = self.refinery.predict_for_reflection_table(self.reflections) excursion_rad = RR["delpsical.rad"] delta_psi_deg = excursion_rad * 180./math.pi print print flex.max(delta_psi_deg), flex.min(delta_psi_deg) mean_excursion = flex.mean(delta_psi_deg) print "The mean excursion is %7.3f degrees, r.m.s.d %7.3f"%(mean_excursion, math.sqrt(flex.mean(RR["delpsical2"]))) crystal = self.experiments[0].crystal beam = self.experiments[0].beam miller_indices = self.reflections["miller_index"] # FIXME XXX revise this formula so as to use a different wavelength potentially for each reflection two_thetas = crystal.get_unit_cell().two_theta(miller_indices,beam.get_wavelength(),deg=True) dspacings = crystal.get_unit_cell().d(miller_indices) dspace_sq = dspacings * dspacings # First -- try to get a reasonable envelope for the observed excursions. ## minimum of three regions; maximum of 50 measurements in each bin print "fitting parameters on %d spots"%len(excursion_rad) n_bins = min(max(3, len(excursion_rad)//25),50) bin_sz = len(excursion_rad)//n_bins print "nbins",n_bins,"bin_sz",bin_sz order = flex.sort_permutation(two_thetas) two_thetas_env = flex.double() dspacings_env = flex.double() excursion_rads_env = flex.double() for x in xrange(0,n_bins): subset = order[x*bin_sz:(x+1)*bin_sz] two_thetas_env.append(flex.mean(two_thetas.select(subset))) dspacings_env.append(flex.mean(dspacings.select(subset))) excursion_rads_env.append(flex.max(flex.abs(excursion_rad.select(subset)))) # Second -- parameter fit ## solve the normal equations sum_inv_u_sq = flex.sum(dspacings_env * dspacings_env) sum_inv_u = flex.sum(dspacings_env) sum_te_u = flex.sum(dspacings_env * excursion_rads_env) sum_te = flex.sum(excursion_rads_env) Normal_Mat = sqr((sum_inv_u_sq, sum_inv_u, sum_inv_u, len(dspacings_env))) Vector = col((sum_te_u, sum_te)) solution = Normal_Mat.inverse() * Vector s_ang = 1./(2*solution[0]) print "Best LSQ fit Scheerer domain size is %9.2f ang"%( s_ang) tan_phi_rad = dspacings / (2. * s_ang) tan_phi_deg = tan_phi_rad * 180./math.pi k_degrees = solution[1]* 180./math.pi print "The LSQ full mosaicity is %8.5f deg; half-mosaicity %9.5f"%(2*k_degrees, k_degrees) tan_outer_deg = tan_phi_deg + k_degrees from xfel.mono_simulation.max_like import minimizer # coerce the estimates to be positive for max-likelihood lower_limit_domain_size = math.pow(crystal.get_unit_cell().volume(), 1./3.)*3 # params.refinement.domain_size_lower_limit d_estimate = max(s_ang, lower_limit_domain_size) M = minimizer(d_i = dspacings, psi_i = excursion_rad, eta_rad = abs(2. * solution[1]), Deff = d_estimate) print "ML: mosaicity FW=%4.2f deg, Dsize=%5.0fA on %d spots"%(M.x[1]*180./math.pi, 2./M.x[0], len(two_thetas)) tan_phi_rad_ML = dspacings / (2. / M.x[0]) tan_phi_deg_ML = tan_phi_rad_ML * 180./math.pi tan_outer_deg_ML = tan_phi_deg_ML + 0.5*M.x[1]*180./math.pi self.nv_acceptance_flags = flex.abs(delta_psi_deg) < tan_outer_deg_ML if self.graph_verbose: #params.refinement.mosaic.enable_AD14F7B: # Excursion vs resolution fit AD1TF7B_MAX2T = 30. AD1TF7B_MAXDP = 1. from matplotlib import pyplot as plt plt.plot(two_thetas, delta_psi_deg, "bo") minplot = flex.min(two_thetas) plt.plot([0,minplot],[mean_excursion,mean_excursion],"k-") LR = flex.linear_regression(two_thetas, delta_psi_deg) model_y = LR.slope()*two_thetas + LR.y_intercept() plt.plot(two_thetas, model_y, "k-") plt.title("ML: mosaicity FW=%4.2f deg, Dsize=%5.0fA on %d spots"%(M.x[1]*180./math.pi, 2./M.x[0], len(two_thetas))) plt.plot(two_thetas, tan_phi_deg_ML, "r.") plt.plot(two_thetas, -tan_phi_deg_ML, "r.") plt.plot(two_thetas, tan_outer_deg_ML, "g.") plt.plot(two_thetas, -tan_outer_deg_ML, "g.") plt.xlim([0,AD1TF7B_MAX2T]) plt.ylim([-AD1TF7B_MAXDP,AD1TF7B_MAXDP]) plt.show() plt.close() from xfel.mono_simulation.util import green_curve_area self.green_curve_area = green_curve_area(two_thetas, tan_outer_deg_ML) print "The green curve area is ", self.green_curve_area crystal._ML_half_mosaicity_deg = M.x[1]*180./(2.*math.pi) crystal._ML_domain_size_ang = 2./M.x[0] self._ML_full_mosaicity_rad = M.x[1] self._ML_domain_size_ang = 2./M.x[0] #params.refinement.mosaic.model_expansion_factor """The expansion factor should be initially set to 1, then expanded so that the # reflections matched becomes as close as possible to # of observed reflections input, in the last integration call. Determine this by inspecting the output log file interactively. Do not exceed the bare minimum threshold needed. The intention is to find an optimal value, global for a given dataset.""" model_expansion_factor = 1.4 crystal._ML_half_mosaicity_deg *= model_expansion_factor crystal._ML_domain_size_ang /= model_expansion_factor return crystal
def estimate_resolution_limit_distl_method1( reflections, imageset, ice_sel=None, plot_filename=None): # Implementation of Method 1 (section 2.4.4) of: # Z. Zhang, N. K. Sauter, H. van den Bedem, G. Snell and A. M. Deacon # J. Appl. Cryst. (2006). 39, 112-119 # http://dx.doi.org/10.1107/S0021889805040677 if ice_sel is None: ice_sel = flex.bool(len(reflections), False) variances = reflections['intensity.sum.variance'] sel = variances > 0 intensities = reflections['intensity.sum.value'] variances = variances.select(sel) ice_sel = ice_sel.select(sel) reflections = reflections.select(sel) intensities = reflections['intensity.sum.value'] d_star_sq = flex.pow2(reflections['rlp'].norms()) d_spacings = uctbx.d_star_sq_as_d(d_star_sq) d_star_cubed = flex.pow(reflections['rlp'].norms(), 3) step = 2 while len(reflections)/step > 40: step += 1 order = flex.sort_permutation(d_spacings, reverse=True) ds3_subset = flex.double() d_subset = flex.double() for i in range(len(reflections)//step): ds3_subset.append(d_star_cubed[order[i*step]]) d_subset.append(d_spacings[order[i*step]]) x = flex.double(range(len(ds3_subset))) # (i) # Usually, Pm is the last point, that is, m = n. But m could be smaller than # n if an unusually high number of spots are detected around a certain # intermediate resolution. In that case, our search for the image resolution # does not go outside the spot 'bump;. This is particularly useful when # ice-rings are present. slopes = (ds3_subset[1:] - ds3_subset[0])/(x[1:]-x[0]) skip_first = 3 p_m = flex.max_index(slopes[skip_first:]) + 1 + skip_first # (ii) from scitbx import matrix x1 = matrix.col((0, ds3_subset[0])) x2 = matrix.col((p_m, ds3_subset[p_m])) gaps = flex.double([0]) v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize() for i in range(1, p_m): x0 = matrix.col((i, ds3_subset[i])) r = x1 - x0 g = abs(v.dot(r)) gaps.append(g) mv = flex.mean_and_variance(gaps) s = mv.unweighted_sample_standard_deviation() # (iii) p_k = flex.max_index(gaps) g_k = gaps[p_k] p_g = p_k for i in range(p_k+1, len(gaps)): g_i = gaps[i] if g_i > (g_k - 0.5 * s): p_g = i ds3_g = ds3_subset[p_g] d_g = d_subset[p_g] noisiness = 0 n = len(ds3_subset) for i in range(n-1): for j in range(i+1, n-1): if slopes[i] >= slopes[j]: noisiness += 1 noisiness /= ((n-1)*(n-2)/2) if plot_filename is not None: if pyplot is None: raise Sorry("matplotlib must be installed to generate a plot.") fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.scatter(range(len(ds3_subset)), ds3_subset) #ax.set_xlabel('') ax.set_ylabel('D^-3') xlim = pyplot.xlim() ylim = pyplot.ylim() ax.vlines(p_g, ylim[0], ylim[1], colors='red') pyplot.xlim(0, xlim[1]) pyplot.ylim(0, ylim[1]) pyplot.savefig(plot_filename) pyplot.close() return d_g, noisiness
def plot_one_model(self, nrow, out): fig = plt.subplot(self.gs[nrow * self.ncols]) two_thetas = self.reduction.get_two_theta_deg() degrees = self.reduction.get_delta_psi_deg() if self.color_encoding == "conventional": positive = (self.reduction.i_sigi >= 0.) fig.plot(two_thetas.select(positive), degrees.select(positive), "bo") fig.plot(two_thetas.select(~positive), degrees.select(~positive), "r+") elif self.color_encoding == "I/sigma": positive = (self.reduction.i_sigi >= 0.) tt_selected = two_thetas.select(positive) dp_selected = degrees.select(positive) i_sigi_select = self.reduction.i_sigi.select(positive) order = flex.sort_permutation(i_sigi_select) tt_selected = tt_selected.select(order) dp_selected = dp_selected.select(order) i_sigi_selected = i_sigi_select.select(order) from matplotlib.colors import Normalize dnorm = Normalize() dcolors = i_sigi_selected.as_numpy_array() dnorm.autoscale(dcolors) N = len(dcolors) CMAP = plt.get_cmap("rainbow") if self.refined.get("partiality_array", None) is None: for n in xrange(N): fig.plot([tt_selected[n]], [dp_selected[n]], color=CMAP(dnorm(dcolors[n])), marker=".", markersize=10) else: partials = self.refined.get("partiality_array") partials_select = partials.select(positive) partials_selected = partials_select.select(order) assert len(partials) == len(positive) for n in xrange(N): fig.plot([tt_selected[n]], [dp_selected[n]], color=CMAP(dnorm(dcolors[n])), marker=".", markersize=20 * partials_selected[n]) # change the markersize to indicate partiality. negative = (self.reduction.i_sigi < 0.) fig.plot(two_thetas.select(negative), degrees.select(negative), "r+", linewidth=1) else: strong = (self.reduction.i_sigi >= 10.) positive = ((~strong) & (self.reduction.i_sigi >= 0.)) negative = (self.reduction.i_sigi < 0.) assert (strong.count(True) + positive.count(True) + negative.count(True) == len(self.reduction.i_sigi)) fig.plot(two_thetas.select(positive), degrees.select(positive), "bo") fig.plot(two_thetas.select(strong), degrees.select(strong), marker='.', linestyle='None', markerfacecolor='#00ee00', markersize=10) fig.plot(two_thetas.select(negative), degrees.select(negative), "r+") # indicate the imposed resolution filter wavelength = self.reduction.experiment.beam.get_wavelength() imposed_res_filter = self.reduction.get_imposed_res_filter(out) resolution_markers = [ a for a in [imposed_res_filter, self.reduction.measurements.d_min()] if a is not None ] for RM in resolution_markers: two_th = (180. / math.pi) * 2. * math.asin(wavelength / (2. * RM)) plt.plot([two_th, two_th], [self.AD1TF7B_MAXDP * -0.8, self.AD1TF7B_MAXDP * 0.8], 'k-') plt.text(two_th, self.AD1TF7B_MAXDP * -0.9, "%4.2f" % RM) #indicate the linefit mean = flex.mean(degrees) minplot = flex.min(two_thetas) plt.plot([0, minplot], [mean, mean], "k-") LR = flex.linear_regression(two_thetas, degrees) model_y = LR.slope() * two_thetas + LR.y_intercept() plt.plot(two_thetas, model_y, "k-") #Now let's take care of the red and green lines. half_mosaic_rotation_deg = self.refined["half_mosaic_rotation_deg"] mosaic_domain_size_ang = self.refined["mosaic_domain_size_ang"] red_curve_domain_size_ang = self.refined.get( "red_curve_domain_size_ang", mosaic_domain_size_ang) a_step = self.AD1TF7B_MAX2T / 50. a_range = flex.double([a_step * x for x in xrange(1, 50) ]) # domain two-theta array #Bragg law [d=L/2sinTH] d_spacing = (wavelength / (2. * flex.sin(math.pi * a_range / 360.))) # convert two_theta to a delta-psi. Formula for Deffective [Dpsi=d/2Deff] inner_phi_deg = flex.asin( (d_spacing / (2. * red_curve_domain_size_ang))) * (180. / math.pi) outer_phi_deg = flex.asin((d_spacing / (2.*mosaic_domain_size_ang)) + \ half_mosaic_rotation_deg*math.pi/180. )*(180./math.pi) plt.title("ML: mosaicity FW=%4.2f deg, Dsize=%5.0fA on %d spots\n%s" % (2. * half_mosaic_rotation_deg, mosaic_domain_size_ang, len(two_thetas), os.path.basename(self.reduction.filename))) plt.plot(a_range, inner_phi_deg, "r-") plt.plot(a_range, -inner_phi_deg, "r-") plt.plot(a_range, outer_phi_deg, "g-") plt.plot(a_range, -outer_phi_deg, "g-") plt.xlim([0, self.AD1TF7B_MAX2T]) plt.ylim([-self.AD1TF7B_MAXDP, self.AD1TF7B_MAXDP]) #second plot shows histogram fig = plt.subplot(self.gs[1 + nrow * self.ncols]) plt.xlim([-self.AD1TF7B_MAXDP, self.AD1TF7B_MAXDP]) nbins = 50 n, bins, patches = plt.hist( dp_selected, nbins, range=(-self.AD1TF7B_MAXDP, self.AD1TF7B_MAXDP), weights=self.reduction.i_sigi.select(positive), normed=0, facecolor="orange", alpha=0.75) #ersatz determine the median i_sigi point: isi_positive = self.reduction.i_sigi.select(positive) isi_order = flex.sort_permutation(isi_positive) reordered = isi_positive.select(isi_order) isi_median = reordered[int(len(isi_positive) * 0.9)] isi_top_half_selection = (isi_positive > isi_median) n, bins, patches = plt.hist( dp_selected.select(isi_top_half_selection), nbins, range=(-self.AD1TF7B_MAXDP, self.AD1TF7B_MAXDP), weights=isi_positive.select(isi_top_half_selection), normed=0, facecolor="#ff0000", alpha=0.75) plt.xlabel("(degrees)") plt.title("Weighted histogram of Delta-psi")
def run(self, flags, sequence=None, shoeboxes=None, **kwargs): from dials.algorithms.background.simple import Linear2dModeller modeller = Linear2dModeller() detector = sequence.get_detector() # sort shoeboxes by centroid z frame = shoeboxes.centroid_all().position_frame() perm = flex.sort_permutation(frame) shoeboxes = shoeboxes.select(perm) buffer_size = 1 bg_plus_buffer = self.background_size + buffer_size t0 = time.time() for i, shoebox in enumerate(shoeboxes): if not flags[perm[i]]: continue panel = detector[shoebox.panel] max_x, max_y = panel.get_image_size() bbox = shoebox.bbox x1, x2, y1, y2, z1, z2 = bbox # expand the bbox with a background region around the spotfinder shoebox # perhaps also should use a buffer zone between the shoebox and the # background region expanded_bbox = ( max(0, x1 - bg_plus_buffer), min(max_x, x2 + bg_plus_buffer), max(0, y1 - bg_plus_buffer), min(max_y, y2 + bg_plus_buffer), z1, z2, ) shoebox.bbox = expanded_bbox t1 = time.time() logger.info("Time expand_shoebox: %s" % (t1 - t0)) rlist = flex.reflection_table() rlist["shoebox"] = shoeboxes rlist["shoebox"].allocate() rlist["panel"] = shoeboxes.panels() rlist["bbox"] = shoeboxes.bounding_boxes() rlist.extract_shoeboxes(sequence) shoeboxes = rlist["shoebox"] shoeboxes.flatten() for i, shoebox in enumerate(shoeboxes): if not flags[perm[i]]: continue panel = detector[shoebox.panel] trusted_range = panel.get_trusted_range() ex1, ex2, ey1, ey2, ez1, ez2 = shoebox.bbox data = shoebox.data mask = flex.bool(data.accessor(), False) for i_y, y in enumerate(range(ey1, ey2)): for i_x, x in enumerate(range(ex1, ex2)): value = data[0, i_y, i_x] if (y >= (ey1 + buffer_size) and y < (ey2 - buffer_size) and x >= (ex1 + buffer_size) and x < (ex2 - buffer_size)): mask[0, i_y, i_x] = False # foreground elif value > trusted_range[0] and value < trusted_range[1]: mask[0, i_y, i_x] = True # background model = modeller.create(data.as_double(), mask) d, a, b = model.params()[:3] if abs(a) > self.gradient_cutoff or abs(b) > self.gradient_cutoff: flags[perm[i]] = False return flags
def integration_concept_detail(self, experiments, reflections, spots,image_number,cb_op_to_primitive,**kwargs): detector = experiments[0].detector crystal = experiments[0].crystal from cctbx.crystal import symmetry c_symmetry = symmetry(space_group = crystal.get_space_group(), unit_cell = crystal.get_unit_cell()) self.image_number = image_number NEAR = 10 pxlsz = detector[0].get_pixel_size() Predicted = self.get_predictions_accounting_for_centering(experiments,reflections,cb_op_to_primitive,**kwargs) FWMOSAICITY = self.inputai.getMosaicity() self.DOMAIN_SZ_ANG = kwargs.get("domain_size_ang", self.__dict__.get("actual",0) ) refineflag = {True:0,False:1}[kwargs.get("domain_size_ang",0)==0] c_symmetry.show_summary(prefix="EXCURSION%1d REPORT FWMOS= %6.4f DOMAIN= %6.1f "%(refineflag,FWMOSAICITY,self.DOMAIN_SZ_ANG)) from annlib_ext import AnnAdaptor self.cell = c_symmetry.unit_cell() query = flex.double() print len(self.predicted) for pred in self.predicted: # predicted spot coord in pixels query.append(pred[0]/pxlsz[0]) query.append(pred[1]/pxlsz[1]) self.reserve_hkllist_for_signal_search = self.hkllist reference = flex.double() assert self.length>NEAR# Can't do spot/pred matching with too few spots for spot in spots: reference.append(spot.ctr_mass_x()) reference.append(spot.ctr_mass_y()) IS_adapt = AnnAdaptor(data=reference,dim=2,k=NEAR) IS_adapt.query(query) idx_cutoff = float(min(self.mask_focus[image_number])) from rstbx.apps.slip_helpers import slip_callbacks cache_refinement_spots = getattr(slip_callbacks.slip_callback,"requires_refinement_spots",False) indexed_pairs_provisional = [] correction_vectors_provisional = [] c_v_p_flex = flex.vec3_double() this_setting_matched_indices = reflections["miller_index"] for j,item in enumerate(this_setting_matched_indices): this_setting_index = self.hkllist.first_index(item) if this_setting_index: Match = dict(spot=j,pred=this_setting_index) indexed_pairs_provisional.append(Match) vector = matrix.col( [reflections["xyzobs.px.value"][j][0] - self.predicted[Match["pred"]][0]/pxlsz[0], reflections["xyzobs.px.value"][j][1] - self.predicted[Match["pred"]][1]/pxlsz[1]]) correction_vectors_provisional.append(vector) c_v_p_flex.append((vector[0],vector[1],0.)) self.N_correction_vectors = len(correction_vectors_provisional) self.rmsd_px = math.sqrt(flex.mean(c_v_p_flex.dot(c_v_p_flex))) print "... %d provisional matches"%self.N_correction_vectors, print "r.m.s.d. in pixels: %6.3f"%(self.rmsd_px) if self.horizons_phil.integration.enable_residual_scatter: from matplotlib import pyplot as plt fig = plt.figure() for cv in correction_vectors_provisional: plt.plot([cv[1]],[-cv[0]],"r.") plt.title(" %d matches, r.m.s.d. %5.2f pixels"%(len(correction_vectors_provisional),math.sqrt(flex.mean(c_v_p_flex.dot(c_v_p_flex))))) plt.axes().set_aspect("equal") self.show_figure(plt,fig,"res") plt.close() if self.horizons_phil.integration.enable_residual_map: from matplotlib import pyplot as plt PX = reflections["xyzobs.px.value"] fig = plt.figure() for match,cv in zip(indexed_pairs_provisional,correction_vectors_provisional): plt.plot([PX[match["spot"]][1]],[-PX[match["spot"]][0]],"r.") plt.plot([self.predicted[match["pred"]][1]/pxlsz[1]],[-self.predicted[match["pred"]][0]/pxlsz[0]],"g.") plt.plot([PX[match["spot"]][1], PX[match["spot"]][1] + 10.*cv[1]], [-PX[match["spot"]][0], -PX[match["spot"]][0] - 10.*cv[0]],'r-') if kwargs.get("user-reentrant") != None and self.horizons_phil.integration.spot_prediction == "dials" \ and self.horizons_phil.integration.enable_residual_map_deltapsi: from rstbx.apps.stills.util import residual_map_special_deltapsi_add_on residual_map_special_deltapsi_add_on( reflections = self.dials_spot_prediction, matches = indexed_pairs_provisional, experiments=experiments, hkllist = self.hkllist, predicted = self.predicted, plot=plt, eta_deg=FWMOSAICITY, deff=self.DOMAIN_SZ_ANG ) plt.xlim([0,detector[0].get_image_size()[1]]) plt.ylim([-detector[0].get_image_size()[0],0]) plt.title(" %d matches, r.m.s.d. %5.2f pixels"%(len(correction_vectors_provisional),math.sqrt(flex.mean(c_v_p_flex.dot(c_v_p_flex))))) plt.axes().set_aspect("equal") self.show_figure(plt,fig,"map") plt.close() indexed_pairs = indexed_pairs_provisional correction_vectors = correction_vectors_provisional ########### skip outlier rejection for this derived class ### However must retain the ability to write out correction vectiors. if True: # at Aaron's request; test later correction_lengths = flex.double([v.length() for v in correction_vectors_provisional]) clorder = flex.sort_permutation(correction_lengths) sorted_cl = correction_lengths.select(clorder) indexed_pairs = [] correction_vectors = [] self.correction_vectors = [] for icand in xrange(len(sorted_cl)): # somewhat arbitrary sigma = 1.0 cutoff for outliers indexed_pairs.append(indexed_pairs_provisional[clorder[icand]]) correction_vectors.append(correction_vectors_provisional[clorder[icand]]) if cache_refinement_spots: self.spotfinder.images[self.frame_numbers[self.image_number]]["refinement_spots"].append( spots[reflections[indexed_pairs[-1]["spot"]]['spotfinder_lookup']]) if kwargs.get("verbose_cv")==True: print "CV OBSCENTER %7.2f %7.2f REFINEDCENTER %7.2f %7.2f"%( float(self.inputpd["size1"])/2.,float(self.inputpd["size2"])/2., self.inputai.xbeam()/pxlsz[0], self.inputai.ybeam()/pxlsz[1]), print "OBSSPOT %7.2f %7.2f PREDSPOT %7.2f %7.2f"%( reflections[indexed_pairs[-1]["spot"]]['xyzobs.px.value'][0], reflections[indexed_pairs[-1]["spot"]]['xyzobs.px.value'][1], self.predicted[indexed_pairs[-1]["pred"]][0]/pxlsz[0], self.predicted[indexed_pairs[-1]["pred"]][1]/pxlsz[1]), the_hkl = self.hkllist[indexed_pairs[-1]["pred"]] print "HKL %4d %4d %4d"%the_hkl,"%2d"%self.setting_id, radial, azimuthal = spots[indexed_pairs[-1]["spot"]].get_radial_and_azimuthal_size( self.inputai.xbeam()/pxlsz[0], self.inputai.ybeam()/pxlsz[1]) print "RADIALpx %5.3f AZIMUTpx %5.3f"%(radial,azimuthal) # Store a list of correction vectors in self. radial, azimuthal = spots[indexed_pairs[-1]['spot']].get_radial_and_azimuthal_size( self.inputai.xbeam()/pxlsz[0], self.inputai.ybeam()/pxlsz[1]) self.correction_vectors.append( dict(obscenter=(float(self.inputpd['size1']) / 2, float(self.inputpd['size2']) / 2), refinedcenter=(self.inputai.xbeam() / pxlsz[0], self.inputai.ybeam() / pxlsz[1]), obsspot=(reflections[indexed_pairs[-1]['spot']]['xyzobs.px.value'][0], reflections[indexed_pairs[-1]['spot']]['xyzobs.px.value'][1]), predspot=(self.predicted[indexed_pairs[-1]['pred']][0] / pxlsz[0], self.predicted[indexed_pairs[-1]['pred']][1] / pxlsz[1]), hkl=(self.hkllist[indexed_pairs[-1]['pred']][0], self.hkllist[indexed_pairs[-1]['pred']][1], self.hkllist[indexed_pairs[-1]['pred']][2]), setting_id=self.setting_id, radial=radial, azimuthal=azimuthal)) self.inputpd["symmetry"] = c_symmetry self.inputpd["symmetry"].show_summary(prefix="SETTING ") if self.horizons_phil.integration.model == "user_supplied": # Not certain of whether the reentrant_* dictionary keys create a memory leak if kwargs.get("user-reentrant",None)==None: kwargs["reentrant_experiments"] = experiments kwargs["reentrant_reflections"] = reflections from cxi_user import post_outlier_rejection self.indexed_pairs = indexed_pairs self.spots = spots post_outlier_rejection(self,image_number,cb_op_to_primitive,self.horizons_phil,kwargs) return ########### finished with user-supplied code correction_lengths=flex.double([v.length() for v in correction_vectors]) self.r_residual = pxlsz[0]*flex.mean(correction_lengths) #assert len(indexed_pairs)>NEAR # must have enough indexed spots if (len(indexed_pairs) <= NEAR): raise Sorry("Not enough indexed spots, only found %d, need %d" % (len(indexed_pairs), NEAR)) reference = flex.double() for item in indexed_pairs: reference.append(spots[item["spot"]].ctr_mass_x()) reference.append(spots[item["spot"]].ctr_mass_y()) PS_adapt = AnnAdaptor(data=reference,dim=2,k=NEAR) PS_adapt.query(query) self.BSmasks = [] # do not use null: self.null_correction_mapping( predicted=self.predicted, self.positional_correction_mapping( predicted=self.predicted, correction_vectors = correction_vectors, PS_adapt = PS_adapt, IS_adapt = IS_adapt, spots = spots) # which spots are close enough to interfere with background? MAXOVER=6 OS_adapt = AnnAdaptor(data=query,dim=2,k=MAXOVER) #six near nbrs OS_adapt.query(query) if self.mask_focus[image_number] is None: raise Sorry("No observed/predicted spot agreement; no Spotfinder masks; skip integration") nbr_cutoff = 2.0* max(self.mask_focus[image_number]) FRAME = int(nbr_cutoff/2) #print "The overlap cutoff is %d pixels"%nbr_cutoff nbr_cutoff_sq = nbr_cutoff * nbr_cutoff #print "Optimized C++ section...", self.set_frame(FRAME) self.set_background_factor(kwargs["background_factor"]) self.set_nbr_cutoff_sq(nbr_cutoff_sq) self.set_guard_width_sq(self.horizons_phil.integration.guard_width_sq) self.set_detector_gain(self.horizons_phil.integration.detector_gain) flex_sorted = flex.int() for item in self.sorted: flex_sorted.append(item[0]);flex_sorted.append(item[1]); if self.horizons_phil.integration.mask_pixel_value is not None: self.set_mask_pixel_val(self.horizons_phil.integration.mask_pixel_value) image_obj = self.imagefiles.imageindex(self.frame_numbers[self.image_number]) image_obj.read() rawdata = image_obj.linearintdata # assume image #1 if self.inputai.active_areas != None: self.detector_xy_draft = self.safe_background( rawdata=rawdata, predicted=self.predicted, OS_adapt=OS_adapt, sorted=flex_sorted, tiles=self.inputai.active_areas.IT, tile_id=self.inputai.active_areas.tile_id); else: self.detector_xy_draft = self.safe_background( rawdata=rawdata, predicted=self.predicted, OS_adapt=OS_adapt, sorted=flex_sorted); for i in xrange(len(self.predicted)): # loop over predicteds B_S_mask = {} keys = self.get_bsmask(i) for k in xrange(0,len(keys),2): B_S_mask[(keys[k],keys[k+1])]=True self.BSmasks.append(B_S_mask) #print "Done" return
def run_with_preparsed(self, params, options): """Run combine_experiments, but allow passing in of parameters""" from dials.util.options import flatten_experiments # Try to load the models and data if len(params.input.experiments) == 0: print("No Experiments found in the input") self.parser.print_help() return if len(params.input.reflections) == 0: print("No reflection data found in the input") self.parser.print_help() return try: assert len(params.input.reflections) == len( params.input.experiments) except AssertionError: raise Sorry( "The number of input reflections files does not match the " "number of input experiments") flat_exps = flatten_experiments(params.input.experiments) ref_beam = params.reference_from_experiment.beam ref_goniometer = params.reference_from_experiment.goniometer ref_scan = params.reference_from_experiment.scan ref_crystal = params.reference_from_experiment.crystal ref_detector = params.reference_from_experiment.detector if ref_beam is not None: try: ref_beam = flat_exps[ref_beam].beam except IndexError: raise Sorry("{} is not a valid experiment ID".format(ref_beam)) if ref_goniometer is not None: try: ref_goniometer = flat_exps[ref_goniometer].goniometer except IndexError: raise Sorry( "{} is not a valid experiment ID".format(ref_goniometer)) if ref_scan is not None: try: ref_scan = flat_exps[ref_scan].scan except IndexError: raise Sorry("{} is not a valid experiment ID".format(ref_scan)) if ref_crystal is not None: try: ref_crystal = flat_exps[ref_crystal].crystal except IndexError: raise Sorry( "{} is not a valid experiment ID".format(ref_crystal)) if ref_detector is not None: assert not params.reference_from_experiment.average_detector try: ref_detector = flat_exps[ref_detector].detector except IndexError: raise Sorry( "{} is not a valid experiment ID".format(ref_detector)) elif params.reference_from_experiment.average_detector: # Average all of the detectors together from scitbx.matrix import col def average_detectors(target, panelgroups, depth): # Recursive function to do the averaging if (params.reference_from_experiment.average_hierarchy_level is None or depth == params.reference_from_experiment. average_hierarchy_level): n = len(panelgroups) sum_fast = col((0.0, 0.0, 0.0)) sum_slow = col((0.0, 0.0, 0.0)) sum_ori = col((0.0, 0.0, 0.0)) # Average the d matrix vectors for pg in panelgroups: sum_fast += col(pg.get_local_fast_axis()) sum_slow += col(pg.get_local_slow_axis()) sum_ori += col(pg.get_local_origin()) sum_fast /= n sum_slow /= n sum_ori /= n # Re-orthagonalize the slow and the fast vectors by rotating around the cross product c = sum_fast.cross(sum_slow) a = sum_fast.angle(sum_slow, deg=True) / 2 sum_fast = sum_fast.rotate(c, a - 45, deg=True) sum_slow = sum_slow.rotate(c, -(a - 45), deg=True) target.set_local_frame(sum_fast, sum_slow, sum_ori) if target.is_group(): # Recurse for i, target_pg in enumerate(target): average_detectors(target_pg, [pg[i] for pg in panelgroups], depth + 1) ref_detector = flat_exps[0].detector average_detectors(ref_detector.hierarchy(), [e.detector.hierarchy() for e in flat_exps], 0) combine = CombineWithReference( beam=ref_beam, goniometer=ref_goniometer, scan=ref_scan, crystal=ref_crystal, detector=ref_detector, params=params, ) # set up global experiments and reflections lists from dials.array_family import flex reflections = flex.reflection_table() global_id = 0 skipped_expts = 0 from dxtbx.model.experiment_list import ExperimentList experiments = ExperimentList() # loop through the input, building up the global lists nrefs_per_exp = [] for ref_wrapper, exp_wrapper in zip(params.input.reflections, params.input.experiments): refs = ref_wrapper.data exps = exp_wrapper.data for i, exp in enumerate(exps): sel = refs["id"] == i sub_ref = refs.select(sel) n_sub_ref = len(sub_ref) if (params.output.min_reflections_per_experiment is not None and n_sub_ref < params.output.min_reflections_per_experiment): skipped_expts += 1 continue nrefs_per_exp.append(n_sub_ref) sub_ref["id"] = flex.int(len(sub_ref), global_id) if params.output.delete_shoeboxes and "shoebox" in sub_ref: del sub_ref["shoebox"] reflections.extend(sub_ref) try: experiments.append(combine(exp)) except ComparisonError as e: # When we failed tolerance checks, give a useful error message (path, index) = find_experiment_in(exp, params.input.experiments) raise Sorry( "Model didn't match reference within required tolerance for experiment {} in {}:" "\n{}\nAdjust tolerances or set compare_models=False to ignore differences." .format(index, path, str(e))) global_id += 1 if (params.output.min_reflections_per_experiment is not None and skipped_expts > 0): print("Removed {0} experiments with fewer than {1} reflections". format(skipped_expts, params.output.min_reflections_per_experiment)) # print number of reflections per experiment from libtbx.table_utils import simple_table header = ["Experiment", "Number of reflections"] rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)] st = simple_table(rows, header) print(st.format()) # save a random subset if requested if (params.output.n_subset is not None and len(experiments) > params.output.n_subset): subset_exp = ExperimentList() subset_refls = flex.reflection_table() if params.output.n_subset_method == "random": n_picked = 0 indices = list(range(len(experiments))) while n_picked < params.output.n_subset: idx = indices.pop(random.randint(0, len(indices) - 1)) subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), n_picked) subset_refls.extend(refls) n_picked += 1 print( "Selecting a random subset of {0} experiments out of {1} total." .format(params.output.n_subset, len(experiments))) elif params.output.n_subset_method == "n_refl": if params.output.n_refl_panel_list is None: refls_subset = reflections else: sel = flex.bool(len(reflections), False) for p in params.output.n_refl_panel_list: sel |= reflections["panel"] == p refls_subset = reflections.select(sel) refl_counts = flex.int() for expt_id in range(len(experiments)): refl_counts.append( len(refls_subset.select( refls_subset["id"] == expt_id))) sort_order = flex.sort_permutation(refl_counts, reverse=True) for expt_id, idx in enumerate( sort_order[:params.output.n_subset]): subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), expt_id) subset_refls.extend(refls) print( "Selecting a subset of {0} experiments with highest number of reflections out of {1} total." .format(params.output.n_subset, len(experiments))) elif params.output.n_subset_method == "significance_filter": from dials.algorithms.integration.stills_significance_filter import ( SignificanceFilter, ) params.output.significance_filter.enable = True sig_filter = SignificanceFilter(params.output) refls_subset = sig_filter(experiments, reflections) refl_counts = flex.int() for expt_id in range(len(experiments)): refl_counts.append( len(refls_subset.select( refls_subset["id"] == expt_id))) sort_order = flex.sort_permutation(refl_counts, reverse=True) for expt_id, idx in enumerate( sort_order[:params.output.n_subset]): subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), expt_id) subset_refls.extend(refls) experiments = subset_exp reflections = subset_refls def save_in_batches(experiments, reflections, exp_name, refl_name, batch_size=1000): from dxtbx.command_line.image_average import splitit for i, indices in enumerate( splitit(list(range(len(experiments))), (len(experiments) // batch_size) + 1)): batch_expts = ExperimentList() batch_refls = flex.reflection_table() for sub_id, sub_idx in enumerate(indices): batch_expts.append(experiments[sub_idx]) sub_refls = reflections.select( reflections["id"] == sub_idx) sub_refls["id"] = flex.int(len(sub_refls), sub_id) batch_refls.extend(sub_refls) exp_filename = os.path.splitext(exp_name)[0] + "_%03d.expt" % i ref_filename = os.path.splitext( refl_name)[0] + "_%03d.refl" % i self._save_output(batch_expts, batch_refls, exp_filename, ref_filename) def combine_in_clusters(experiments_l, reflections_l, exp_name, refl_name, end_count): result = [] for cluster, experiment in enumerate(experiments_l): cluster_expts = ExperimentList() cluster_refls = flex.reflection_table() for i, expts in enumerate(experiment): refls = reflections_l[cluster][i] refls["id"] = flex.int(len(refls), i) cluster_expts.append(expts) cluster_refls.extend(refls) exp_filename = os.path.splitext(exp_name)[0] + ( "_cluster%d.expt" % (end_count - cluster)) ref_filename = os.path.splitext(refl_name)[0] + ( "_cluster%d.refl" % (end_count - cluster)) result.append( (cluster_expts, cluster_refls, exp_filename, ref_filename)) return result # cluster the resulting experiments if requested if params.clustering.use: clustered = Cluster( experiments, reflections, dendrogram=params.clustering.dendrogram, threshold=params.clustering.threshold, n_max=params.clustering.max_crystals, ) n_clusters = len(clustered.clustered_frames) def not_too_many(keeps): if params.clustering.max_clusters is not None: return len(keeps) < params.clustering.max_clusters return True keep_frames = [] sorted_keys = sorted(clustered.clustered_frames.keys()) while len(clustered.clustered_frames) > 0 and not_too_many( keep_frames): keep_frames.append( clustered.clustered_frames.pop(sorted_keys.pop(-1))) if params.clustering.exclude_single_crystal_clusters: keep_frames = [k for k in keep_frames if len(k) > 1] clustered_experiments = [[f.experiment for f in frame_cluster] for frame_cluster in keep_frames] clustered_reflections = [[f.reflections for f in frame_cluster] for frame_cluster in keep_frames] list_of_combined = combine_in_clusters( clustered_experiments, clustered_reflections, params.output.experiments_filename, params.output.reflections_filename, n_clusters, ) for saveable_tuple in list_of_combined: if params.output.max_batch_size is None: self._save_output(*saveable_tuple) else: save_in_batches(*saveable_tuple, batch_size=params.output.max_batch_size) else: if params.output.max_batch_size is None: self._save_output( experiments, reflections, params.output.experiments_filename, params.output.reflections_filename, ) else: save_in_batches( experiments, reflections, params.output.experiments_filename, params.output.reflections_filename, batch_size=params.output.max_batch_size, ) return
def __call__(self): """Determine optimal mosaicity and domain size model (monochromatic)""" RR = self.refinery.predict_for_reflection_table(self.reflections) excursion_rad = RR["delpsical.rad"] delta_psi_deg = excursion_rad * 180. / math.pi print print flex.max(delta_psi_deg), flex.min(delta_psi_deg) mean_excursion = flex.mean(delta_psi_deg) print "The mean excursion is %7.3f degrees, r.m.s.d %7.3f" % ( mean_excursion, math.sqrt(flex.mean(RR["delpsical2"]))) crystal = self.experiments[0].crystal beam = self.experiments[0].beam miller_indices = self.reflections["miller_index"] # FIXME XXX revise this formula so as to use a different wavelength potentially for each reflection two_thetas = crystal.get_unit_cell().two_theta(miller_indices, beam.get_wavelength(), deg=True) dspacings = crystal.get_unit_cell().d(miller_indices) dspace_sq = dspacings * dspacings # First -- try to get a reasonable envelope for the observed excursions. ## minimum of three regions; maximum of 50 measurements in each bin print "fitting parameters on %d spots" % len(excursion_rad) n_bins = min(max(3, len(excursion_rad) // 25), 50) bin_sz = len(excursion_rad) // n_bins print "nbins", n_bins, "bin_sz", bin_sz order = flex.sort_permutation(two_thetas) two_thetas_env = flex.double() dspacings_env = flex.double() excursion_rads_env = flex.double() for x in xrange(0, n_bins): subset = order[x * bin_sz:(x + 1) * bin_sz] two_thetas_env.append(flex.mean(two_thetas.select(subset))) dspacings_env.append(flex.mean(dspacings.select(subset))) excursion_rads_env.append( flex.max(flex.abs(excursion_rad.select(subset)))) # Second -- parameter fit ## solve the normal equations sum_inv_u_sq = flex.sum(dspacings_env * dspacings_env) sum_inv_u = flex.sum(dspacings_env) sum_te_u = flex.sum(dspacings_env * excursion_rads_env) sum_te = flex.sum(excursion_rads_env) Normal_Mat = sqr( (sum_inv_u_sq, sum_inv_u, sum_inv_u, len(dspacings_env))) Vector = col((sum_te_u, sum_te)) solution = Normal_Mat.inverse() * Vector s_ang = 1. / (2 * solution[0]) print "Best LSQ fit Scheerer domain size is %9.2f ang" % (s_ang) tan_phi_rad = dspacings / (2. * s_ang) tan_phi_deg = tan_phi_rad * 180. / math.pi k_degrees = solution[1] * 180. / math.pi print "The LSQ full mosaicity is %8.5f deg; half-mosaicity %9.5f" % ( 2 * k_degrees, k_degrees) tan_outer_deg = tan_phi_deg + k_degrees from xfel.mono_simulation.max_like import minimizer # coerce the estimates to be positive for max-likelihood lower_limit_domain_size = math.pow( crystal.get_unit_cell().volume(), 1. / 3.) * 3 # params.refinement.domain_size_lower_limit d_estimate = max(s_ang, lower_limit_domain_size) M = minimizer(d_i=dspacings, psi_i=excursion_rad, eta_rad=abs(2. * solution[1]), Deff=d_estimate) print "ML: mosaicity FW=%4.2f deg, Dsize=%5.0fA on %d spots" % ( M.x[1] * 180. / math.pi, 2. / M.x[0], len(two_thetas)) tan_phi_rad_ML = dspacings / (2. / M.x[0]) tan_phi_deg_ML = tan_phi_rad_ML * 180. / math.pi tan_outer_deg_ML = tan_phi_deg_ML + 0.5 * M.x[1] * 180. / math.pi self.nv_acceptance_flags = flex.abs(delta_psi_deg) < tan_outer_deg_ML if self.graph_verbose: #params.refinement.mosaic.enable_AD14F7B: # Excursion vs resolution fit AD1TF7B_MAX2T = 30. AD1TF7B_MAXDP = 1. from matplotlib import pyplot as plt plt.plot(two_thetas, delta_psi_deg, "bo") minplot = flex.min(two_thetas) plt.plot([0, minplot], [mean_excursion, mean_excursion], "k-") LR = flex.linear_regression(two_thetas, delta_psi_deg) model_y = LR.slope() * two_thetas + LR.y_intercept() plt.plot(two_thetas, model_y, "k-") plt.title("ML: mosaicity FW=%4.2f deg, Dsize=%5.0fA on %d spots" % (M.x[1] * 180. / math.pi, 2. / M.x[0], len(two_thetas))) plt.plot(two_thetas, tan_phi_deg_ML, "r.") plt.plot(two_thetas, -tan_phi_deg_ML, "r.") plt.plot(two_thetas, tan_outer_deg_ML, "g.") plt.plot(two_thetas, -tan_outer_deg_ML, "g.") plt.xlim([0, AD1TF7B_MAX2T]) plt.ylim([-AD1TF7B_MAXDP, AD1TF7B_MAXDP]) plt.show() plt.close() from xfel.mono_simulation.util import green_curve_area self.green_curve_area = green_curve_area(two_thetas, tan_outer_deg_ML) print "The green curve area is ", self.green_curve_area crystal._ML_half_mosaicity_deg = M.x[1] * 180. / (2. * math.pi) crystal._ML_domain_size_ang = 2. / M.x[0] self._ML_full_mosaicity_rad = M.x[1] self._ML_domain_size_ang = 2. / M.x[0] #params.refinement.mosaic.model_expansion_factor """The expansion factor should be initially set to 1, then expanded so that the # reflections matched becomes as close as possible to # of observed reflections input, in the last integration call. Determine this by inspecting the output log file interactively. Do not exceed the bare minimum threshold needed. The intention is to find an optimal value, global for a given dataset.""" model_expansion_factor = 1.4 crystal._ML_half_mosaicity_deg *= model_expansion_factor crystal._ML_domain_size_ang /= model_expansion_factor return crystal
def abs_bounding_lines_in_mm(self, detector): """Return bounding lines of kapton""" # first get bounding directions from detector: detz = flex.mean(flex.double([panel.get_origin()[2] for panel in detector])) edges = [] for ii, panel in enumerate(detector): f_size, s_size = panel.get_image_size() for point in [(0, 0), (0, s_size), (f_size, 0), (f_size, s_size)]: x, y = panel.get_pixel_lab_coord(point)[0:2] edges.append((x, y, detz)) # Use the idea that the corners of the detector are end points of the diagonal and will be the # top 2 max dimension among all end points dlist = flex.double() dlist_idx = [] n_edges = len(edges) for ii in range(n_edges - 1): for jj in range(ii + 1, n_edges): pt_1 = col(edges[ii]) pt_2 = col(edges[jj]) distance = (pt_1 - pt_2).length() dlist.append(distance) dlist_idx.append((ii, jj)) sorted_idx = flex.sort_permutation(dlist, reverse=True) edge_pts = [ edges[dlist_idx[sorted_idx[0]][0]], edges[dlist_idx[sorted_idx[1]][0]], edges[dlist_idx[sorted_idx[0]][1]], edges[dlist_idx[sorted_idx[1]][1]], ] self.detector_edges = edge_pts # Now get the maximum extent of the intersection of the rays with the detector all_ints = [] kapton_path_list = [] for ii, edge_point in enumerate(self.edge_points): s1 = edge_point.normalize() kapton_path_mm = self.get_kapton_path_mm(s1) for panel in detector: try: x_int, y_int = panel.get_lab_coord(panel.get_ray_intersection(s1))[ 0:2 ] except RuntimeError: pass int_point = (x_int, y_int, detz) # Arbitrary tolerance of couple of pixels otherwise these points were getting clustered together tolerance = min(panel.get_pixel_size()) * 2.0 if ( sum( (col(trial_pt) - col(int_point)).length() <= tolerance for trial_pt in all_ints ) == 0 ): all_ints.append(int_point) kapton_path_list.append(kapton_path_mm) # Use the idea that the extreme edges of the intersection points are end points of the diagonal and will be the # top 2 max dimension among all end points dlist = flex.double() dlist_idx = [] n_edges = len(all_ints) for ii in range(n_edges - 1): pt_1 = col(all_ints[ii]) for jj in range(ii + 1, n_edges): pt_2 = col(all_ints[jj]) distance = (pt_1 - pt_2).length() dlist.append(distance) dlist_idx.append((ii, jj)) sorted_idx = flex.sort_permutation(dlist, reverse=True) int_edge_pts = [ all_ints[dlist_idx[sorted_idx[0]][0]], all_ints[dlist_idx[sorted_idx[1]][0]], all_ints[dlist_idx[sorted_idx[0]][1]], all_ints[dlist_idx[sorted_idx[1]][1]], ] # Sort out the edge points and the int_edge_points which are on the same side kapton_edge_1 = (col(int_edge_pts[0]) - col(int_edge_pts[1])).normalize() kapton_edge_2 = (col(int_edge_pts[2]) - col(int_edge_pts[3])).normalize() min_loss_func = -999.9 edge_idx = None for edge_idx_combo in [(0, 1, 2, 3), (0, 3, 1, 2)]: side_1 = ( col(edge_pts[edge_idx_combo[0]]) - col(edge_pts[edge_idx_combo[1]]) ).normalize() side_2 = ( col(edge_pts[edge_idx_combo[2]]) - col(edge_pts[edge_idx_combo[3]]) ).normalize() loss_func = abs(kapton_edge_1.dot(side_1)) + abs(kapton_edge_2.dot(side_2)) if loss_func > min_loss_func: edge_idx = edge_idx_combo min_loss_func = loss_func # Make sure the edges of the detector and the kapton are in the same orientation # first for kapton edge 1 side_1 = (col(edge_pts[edge_idx[0]]) - col(edge_pts[edge_idx[1]])).normalize() side_2 = (col(edge_pts[edge_idx[2]]) - col(edge_pts[edge_idx[3]])).normalize() v1 = kapton_edge_1.dot(side_1) v2 = kapton_edge_2.dot(side_2) if v1 < 0.0: edge_idx = (edge_idx[1], edge_idx[0], edge_idx[2], edge_idx[3]) if v2 < 0.0: edge_idx = (edge_idx[0], edge_idx[1], edge_idx[3], edge_idx[2]) # Now make sure the edges and the kapton lines are on the right side (i.e not swapped). # Let's look at edge_idx[0:2] i,e the first edge of detector parallel to the kapton pt1 = edge_pts[edge_idx[0]] pt2 = edge_pts[edge_idx[1]] # Now find the distance between each of these points and the kapton lines. d1_kapton_1 = self.distance_of_point_from_line( pt1, int_edge_pts[0], int_edge_pts[1] ) d1_kapton_2 = self.distance_of_point_from_line( pt1, int_edge_pts[2], int_edge_pts[3] ) d2_kapton_1 = self.distance_of_point_from_line( pt2, int_edge_pts[0], int_edge_pts[1] ) d2_kapton_2 = self.distance_of_point_from_line( pt2, int_edge_pts[2], int_edge_pts[3] ) if d1_kapton_1 < d1_kapton_2: # closer to max than edge assert ( d2_kapton_1 < d2_kapton_2 ), "Distance mismatch. Edge of detector might be on wrong side of kapton tape ... please check" pair_values = [ ( edge_pts[edge_idx[0]], edge_pts[edge_idx[1]], edge_pts[edge_idx[2]], edge_pts[edge_idx[3]], ), (int_edge_pts[0], int_edge_pts[1], int_edge_pts[2], int_edge_pts[3]), ] else: pair_values = [ ( edge_pts[edge_idx[0]], edge_pts[edge_idx[1]], edge_pts[edge_idx[2]], edge_pts[edge_idx[3]], ), (int_edge_pts[2], int_edge_pts[3], int_edge_pts[0], int_edge_pts[1]), ] return pair_values
def plot_one_model(self,nrow,out): fig = plt.subplot(self.gs[nrow*self.ncols]) two_thetas = self.reduction.get_two_theta_deg() degrees = self.reduction.get_delta_psi_deg() if self.color_encoding=="conventional": positive = (self.reduction.i_sigi>=0.) fig.plot(two_thetas.select(positive), degrees.select(positive), "bo") fig.plot(two_thetas.select(~positive), degrees.select(~positive), "r+") elif self.color_encoding=="I/sigma": positive = (self.reduction.i_sigi>=0.) tt_selected = two_thetas.select(positive) dp_selected = degrees.select(positive) i_sigi_select = self.reduction.i_sigi.select(positive) order = flex.sort_permutation(i_sigi_select) tt_selected = tt_selected.select(order) dp_selected = dp_selected.select(order) i_sigi_selected = i_sigi_select.select(order) from matplotlib.colors import Normalize dnorm = Normalize() dcolors = i_sigi_selected.as_numpy_array() dnorm.autoscale(dcolors) N = len(dcolors) CMAP = plt.get_cmap("rainbow") if self.refined.get("partiality_array",None) is None: for n in xrange(N): fig.plot([tt_selected[n]],[dp_selected[n]], color=CMAP(dnorm(dcolors[n])),marker=".", markersize=10) else: partials = self.refined.get("partiality_array") partials_select = partials.select(positive) partials_selected = partials_select.select(order) assert len(partials)==len(positive) for n in xrange(N): fig.plot([tt_selected[n]],[dp_selected[n]], color=CMAP(dnorm(dcolors[n])),marker=".", markersize=20*partials_selected[n]) # change the markersize to indicate partiality. negative = (self.reduction.i_sigi<0.) fig.plot(two_thetas.select(negative), degrees.select(negative), "r+", linewidth=1) else: strong = (self.reduction.i_sigi>=10.) positive = ((~strong) & (self.reduction.i_sigi>=0.)) negative = (self.reduction.i_sigi<0.) assert (strong.count(True)+positive.count(True)+negative.count(True) == len(self.reduction.i_sigi)) fig.plot(two_thetas.select(positive), degrees.select(positive), "bo") fig.plot(two_thetas.select(strong), degrees.select(strong), marker='.',linestyle='None', markerfacecolor='#00ee00', markersize=10) fig.plot(two_thetas.select(negative), degrees.select(negative), "r+") # indicate the imposed resolution filter wavelength = self.reduction.experiment.beam.get_wavelength() imposed_res_filter = self.reduction.get_imposed_res_filter(out) resolution_markers = [ a for a in [imposed_res_filter,self.reduction.measurements.d_min()] if a is not None] for RM in resolution_markers: two_th = (180./math.pi)*2.*math.asin(wavelength/(2.*RM)) plt.plot([two_th, two_th],[self.AD1TF7B_MAXDP*-0.8,self.AD1TF7B_MAXDP*0.8],'k-') plt.text(two_th,self.AD1TF7B_MAXDP*-0.9,"%4.2f"%RM) #indicate the linefit mean = flex.mean(degrees) minplot = flex.min(two_thetas) plt.plot([0,minplot],[mean,mean],"k-") LR = flex.linear_regression(two_thetas, degrees) model_y = LR.slope()*two_thetas + LR.y_intercept() plt.plot(two_thetas, model_y, "k-") #Now let's take care of the red and green lines. half_mosaic_rotation_deg = self.refined["half_mosaic_rotation_deg"] mosaic_domain_size_ang = self.refined["mosaic_domain_size_ang"] red_curve_domain_size_ang = self.refined.get("red_curve_domain_size_ang",mosaic_domain_size_ang) a_step = self.AD1TF7B_MAX2T / 50. a_range = flex.double([a_step*x for x in xrange(1,50)]) # domain two-theta array #Bragg law [d=L/2sinTH] d_spacing = (wavelength/(2.*flex.sin(math.pi*a_range/360.))) # convert two_theta to a delta-psi. Formula for Deffective [Dpsi=d/2Deff] inner_phi_deg = flex.asin((d_spacing / (2.*red_curve_domain_size_ang)) )*(180./math.pi) outer_phi_deg = flex.asin((d_spacing / (2.*mosaic_domain_size_ang)) + \ half_mosaic_rotation_deg*math.pi/180. )*(180./math.pi) plt.title("ML: mosaicity FW=%4.2f deg, Dsize=%5.0fA on %d spots\n%s"%( 2.*half_mosaic_rotation_deg, mosaic_domain_size_ang, len(two_thetas), os.path.basename(self.reduction.filename))) plt.plot(a_range, inner_phi_deg, "r-") plt.plot(a_range,-inner_phi_deg, "r-") plt.plot(a_range, outer_phi_deg, "g-") plt.plot(a_range, -outer_phi_deg, "g-") plt.xlim([0,self.AD1TF7B_MAX2T]) plt.ylim([-self.AD1TF7B_MAXDP,self.AD1TF7B_MAXDP]) #second plot shows histogram fig = plt.subplot(self.gs[1+nrow*self.ncols]) plt.xlim([-self.AD1TF7B_MAXDP,self.AD1TF7B_MAXDP]) nbins = 50 n,bins,patches = plt.hist(dp_selected, nbins, range=(-self.AD1TF7B_MAXDP,self.AD1TF7B_MAXDP), weights=self.reduction.i_sigi.select(positive), normed=0, facecolor="orange", alpha=0.75) #ersatz determine the median i_sigi point: isi_positive = self.reduction.i_sigi.select(positive) isi_order = flex.sort_permutation(isi_positive) reordered = isi_positive.select(isi_order) isi_median = reordered[int(len(isi_positive)*0.9)] isi_top_half_selection = (isi_positive>isi_median) n,bins,patches = plt.hist(dp_selected.select(isi_top_half_selection), nbins, range=(-self.AD1TF7B_MAXDP,self.AD1TF7B_MAXDP), weights=isi_positive.select(isi_top_half_selection), normed=0, facecolor="#ff0000", alpha=0.75) plt.xlabel("(degrees)") plt.title("Weighted histogram of Delta-psi")
def estimate_resolution_limit(reflections, imageset, ice_sel=None, plot_filename=None): if ice_sel is None: ice_sel = flex.bool(len(reflections), False) d_star_sq = flex.pow2(reflections['rlp'].norms()) d_spacings = uctbx.d_star_sq_as_d(d_star_sq) intensities = reflections['intensity.sum.value'] variances = reflections['intensity.sum.variance'] sel = variances > 0 intensities = intensities.select(sel) variances = variances.select(sel) ice_sel = ice_sel.select(sel) i_over_sigi = intensities/flex.sqrt(variances) log_i_over_sigi = flex.log(i_over_sigi) fit = flex.linear_regression( d_star_sq.select(~ice_sel), log_i_over_sigi.select(~ice_sel)) m = fit.slope() c = fit.y_intercept() log_i_sigi_lower = flex.double() d_star_sq_lower = flex.double() log_i_sigi_upper = flex.double() d_star_sq_upper = flex.double() binner = binner_equal_population( d_star_sq, target_n_per_bin=20, max_slots=20, min_slots=5) outliers_all = flex.bool(len(reflections), False) low_percentile_limit = 0.1 upper_percentile_limit = 1-low_percentile_limit d_spacings = uctbx.d_star_sq_as_d(d_star_sq) for i_slot, slot in enumerate(binner.bins): sel_all = (d_spacings < slot.d_max) & (d_spacings >= slot.d_min) sel = ~(ice_sel) & sel_all #sel = ~(ice_sel) & (d_spacings < slot.d_max) & (d_spacings >= slot.d_min) #print "%.2f" %(sel.count(True)/sel_all.count(True)) if sel.count(True) == 0: #outliers_all.set_selected(sel_all & ice_sel, True) continue #if i_slot > i_slot_max: #break #else: #continue outliers = wilson_outliers( reflections.select(sel_all), ice_sel=ice_sel.select(sel_all)) #print "rejecting %d wilson outliers" %outliers.count(True) outliers_all.set_selected(sel_all, outliers) #if sel.count(True)/sel_all.count(True) < 0.25: #outliers_all.set_selected(sel_all & ice_sel, True) #from scitbx.math import median_statistics #intensities_sel = intensities.select(sel) #stats = median_statistics(intensities_sel) #z_score = 0.6745 * (intensities_sel - stats.median)/stats.median_absolute_deviation #outliers = z_score > 3.5 #perm = flex.sort_permutation(intensities_sel) ##print ' '.join('%.2f' %v for v in intensities_sel.select(perm)) ##print ' '.join('%.2f' %v for v in z_score.select(perm)) ##print isel = sel_all.iselection().select(~(outliers) & ~(ice_sel).select(sel_all)) log_i_over_sigi_sel = log_i_over_sigi.select(isel) d_star_sq_sel = d_star_sq.select(isel) perm = flex.sort_permutation(log_i_over_sigi_sel) i_lower = perm[int(math.floor(low_percentile_limit * len(perm)))] i_upper = perm[int(math.floor(upper_percentile_limit * len(perm)))] log_i_sigi_lower.append(log_i_over_sigi_sel[i_lower]) log_i_sigi_upper.append(log_i_over_sigi_sel[i_upper]) d_star_sq_upper.append(d_star_sq_sel[i_lower]) d_star_sq_lower.append(d_star_sq_sel[i_upper]) fit_upper = flex.linear_regression(d_star_sq_upper, log_i_sigi_upper) m_upper = fit_upper.slope() c_upper = fit_upper.y_intercept() fit_lower = flex.linear_regression(d_star_sq_lower, log_i_sigi_lower) m_lower = fit_lower.slope() c_lower = fit_lower.y_intercept() #fit_upper.show_summary() #fit_lower.show_summary() if m_upper == m_lower: intersection = (-1,-1) resolution_estimate = -1 inside = flex.bool(len(d_star_sq), False) else: # http://en.wikipedia.org/wiki/Line%E2%80%93line_intersection#Given_the_equations_of_the_lines intersection = ( (c_lower-c_upper)/(m_upper-m_lower), (m_upper*c_lower-m_lower*c_upper)/(m_upper-m_lower)) a = m_upper c_ = c_upper b = m_lower d = c_lower assert intersection == ((d-c_)/(a-b), (a*d-b*c_)/(a-b)) #inside = points_inside_envelope( #d_star_sq, log_i_over_sigi, m_upper, c_upper, m_lower, c_lower) inside = points_below_line(d_star_sq, log_i_over_sigi, m_upper, c_upper) inside = inside & ~outliers_all if inside.count(True) > 0: d_star_sq_estimate = flex.max(d_star_sq.select(inside)) #d_star_sq_estimate = intersection[0] resolution_estimate = uctbx.d_star_sq_as_d(d_star_sq_estimate) else: resolution_estimate = -1 #resolution_estimate = max(resolution_estimate, flex.min(d_spacings)) if plot_filename is not None: if pyplot is None: raise Sorry("matplotlib must be installed to generate a plot.") fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.scatter(d_star_sq, log_i_over_sigi, marker='+') ax.scatter(d_star_sq.select(inside), log_i_over_sigi.select(inside), marker='+', color='green') ax.scatter(d_star_sq.select(ice_sel), log_i_over_sigi.select(ice_sel), marker='+', color='black') ax.scatter(d_star_sq.select(outliers_all), log_i_over_sigi.select(outliers_all), marker='+', color='grey') ax.scatter(d_star_sq_upper, log_i_sigi_upper, marker='+', color='red') ax.scatter(d_star_sq_lower, log_i_sigi_lower, marker='+', color='red') if (intersection[0] <= ax.get_xlim()[1] and intersection[1] <= ax.get_ylim()[1]): ax.scatter([intersection[0]], [intersection[1]], marker='x', s=50, color='b') #ax.hexbin(d_star_sq, log_i_over_sigi, gridsize=30) xlim = pyplot.xlim() ax.plot(xlim, [(m * x + c) for x in xlim]) ax.plot(xlim, [(m_upper * x + c_upper) for x in xlim], color='red') ax.plot(xlim, [(m_lower * x + c_lower) for x in xlim], color='red') ax.set_xlabel('d_star_sq') ax.set_ylabel('ln(I/sigI)') ax.set_xlim((max(-xlim[1], -0.05), xlim[1])) ax.set_ylim((0, ax.get_ylim()[1])) for i_slot, slot in enumerate(binner.bins): if i_slot == 0: ax.vlines(uctbx.d_as_d_star_sq(slot.d_max), 0, ax.get_ylim()[1], linestyle='dotted', color='grey') ax.vlines(uctbx.d_as_d_star_sq(slot.d_min), 0, ax.get_ylim()[1], linestyle='dotted', color='grey') ax_ = ax.twiny() # ax2 is responsible for "top" axis and "right" axis xticks = ax.get_xticks() xlim = ax.get_xlim() xticks_d = [ uctbx.d_star_sq_as_d(ds2) if ds2 > 0 else 0 for ds2 in xticks ] xticks_ = [ds2/(xlim[1]-xlim[0]) for ds2 in xticks] ax_.set_xticks(xticks) ax_.set_xlim(ax.get_xlim()) ax_.set_xlabel(r"Resolution ($\AA$)") ax_.set_xticklabels(["%.1f" %d for d in xticks_d]) #pyplot.show() pyplot.savefig(plot_filename) pyplot.close() return resolution_estimate
def _create_summation_matrix(self): """ "Create a summation matrix to allow sums into intensity bins. This routine attempts to bin into bins equally spaced in log(intensity), to give a representative sample across all intensities. To avoid undersampling, it is required that there are at least 100 reflections per intensity bin unless there are very few reflections.""" n = self.Ih_table.size self.binning_info["n_reflections"] = n summation_matrix = sparse.matrix(n, self.n_bins) Ih = self.Ih_table.Ih_values * self.Ih_table.inverse_scale_factors size_order = flex.sort_permutation(Ih, reverse=True) Imax = max(Ih) Imin = max(1.0, min(Ih)) # avoid log issues spacing = (log(Imax) - log(Imin)) / float(self.n_bins) boundaries = [Imax] + [ exp(log(Imax) - (i * spacing)) for i in range(1, self.n_bins + 1) ] boundaries[-1] = min(Ih) - 0.01 self.binning_info["bin_boundaries"] = boundaries self.binning_info["refl_per_bin"] = flex.double() n_cumul = 0 if Ih.size() > 100 * self.min_reflections_required: self.min_reflections_required = int(Ih.size() / 100.0) min_per_bin = min(self.min_reflections_required, int(n / (3.0 * self.n_bins))) for i in range(len(boundaries) - 1): maximum = boundaries[i] minimum = boundaries[i + 1] sel1 = Ih <= maximum sel2 = Ih > minimum sel = sel1 & sel2 isel = sel.iselection() n_in_bin = isel.size() if n_in_bin < min_per_bin: # need more in this bin m = n_cumul + min_per_bin if m < n: # still some refl left to use idx = size_order[m] intensity = Ih[idx] boundaries[i + 1] = intensity minimum = boundaries[i + 1] sel = sel1 & (Ih > minimum) isel = sel.iselection() n_in_bin = isel.size() self.binning_info["refl_per_bin"].append(n_in_bin) for j in isel: summation_matrix[j, i] = 1 n_cumul += n_in_bin cols_to_del = [] for i, col in enumerate(summation_matrix.cols()): if col.non_zeroes < min_per_bin - 5: cols_to_del.append(i) n_new_cols = summation_matrix.n_cols - len(cols_to_del) if n_new_cols == self.n_bins: for i in range(len(boundaries) - 1): maximum = boundaries[i] minimum = boundaries[i + 1] sel1 = Ih <= maximum sel2 = Ih > minimum sel = sel1 & sel2 m = flex.mean(Ih.select(sel)) self.binning_info["mean_intensities"].append(m) return summation_matrix new_sum_matrix = sparse.matrix(summation_matrix.n_rows, n_new_cols) next_col = 0 refl_per_bin = flex.double() new_bounds = [] for i, col in enumerate(summation_matrix.cols()): if i not in cols_to_del: new_sum_matrix[:, next_col] = col next_col += 1 new_bounds.append(boundaries[i]) refl_per_bin.append(self.binning_info["refl_per_bin"][i]) self.binning_info["refl_per_bin"] = refl_per_bin new_bounds.append(boundaries[-1]) self.binning_info["bin_boundaries"] = new_bounds for i in range(len(new_bounds) - 1): maximum = new_bounds[i] minimum = new_bounds[i + 1] sel1 = Ih <= maximum sel2 = Ih > minimum sel = sel1 & sel2 m = flex.mean(Ih.select(sel)) self.binning_info["mean_intensities"].append(m) return new_sum_matrix
def read_mtzfile(filename, batch_offset=None): """ Read the mtz file """ miller_arrays = mtz.object(file_name=filename).as_miller_arrays( merge_equivalents=False) # Select the desired columns intensities = None batches = None for array in miller_arrays: if array.info().labels == ["I", "SIGI"]: intensities = array if array.info().labels == ["BATCH"]: batches = array if not intensities: raise KeyError( "Intensities not found in mtz file, expected labels I, SIGI") if not batches: raise KeyError("Batch values not found") if batches.data().size() != intensities.data().size(): raise ValueError("Batch and intensity array sizes do not match") # Get the unit cell and space group unit_cell = intensities.unit_cell() space_group = intensities.crystal_symmetry().space_group() # The reflection data table = flex.reflection_table() table["miller_index"] = intensities.indices() table["intensity"] = intensities.data() table["variance"] = flex.pow2(intensities.sigmas()) # Create unit cell list zeroed_batches = batches.data() - flex.min(batches.data()) dataset = flex.int(table.size(), 0) sorted_batches = flex.sorted(zeroed_batches) sel_perm = flex.sort_permutation(zeroed_batches) if not batch_offset: previous = 0 potential_batch_offsets = flex.double() for i, b in enumerate(sorted_batches): if b - previous > 1: potential_batch_offsets.append(b - previous) previous = b potential = flex.sorted(potential_batch_offsets) # potential is a list of low numbers (where images may not have any spots) # and larger numbers between batches. if len(potential) == 1: batch_offset = potential[0] logger.info( """ Using a batch offset of %s to split datasets. Batch offset can be specified with mtz.batch_offset= """, batch_offset, ) elif len(potential) > 1: diffs = flex.double([ potential[i + 1] - p for i, p in enumerate(potential[:-1]) ]) i = flex.sort_permutation(diffs)[-1] batch_offset = int(potential[i + 1] - (0.2 * diffs[i])) logger.info( """ Using an approximate batch offset of %s to split datasets. Batch offset can be specified with mtz.batch_offset= """, batch_offset, ) else: batch_offset = 1 previous = 0 dataset_no = 0 for i, b in enumerate(sorted_batches): if b - previous > batch_offset - 1: dataset_no += 1 dataset[i] = dataset_no previous = b table["dataset"] = flex.int(table.size(), 0) table["dataset"].set_selected(sel_perm, dataset) return table, unit_cell, space_group
def run_with_preparsed(self, params, options): """Run combine_experiments, but allow passing in of parameters""" # Try to load the models and data if not params.input.experiments: print("No Experiments found in the input") self.parser.print_help() return if not params.input.reflections: print("No reflection data found in the input") self.parser.print_help() return if len(params.input.reflections) != len(params.input.experiments): sys.exit( "The number of input reflections files does not match the " "number of input experiments") flat_exps = flatten_experiments(params.input.experiments) ref_beam = params.reference_from_experiment.beam ref_goniometer = params.reference_from_experiment.goniometer ref_scan = params.reference_from_experiment.scan ref_crystal = params.reference_from_experiment.crystal ref_detector = params.reference_from_experiment.detector if ref_beam is not None: try: ref_beam = flat_exps[ref_beam].beam except IndexError: sys.exit(f"{ref_beam} is not a valid experiment ID") if ref_goniometer is not None: try: ref_goniometer = flat_exps[ref_goniometer].goniometer except IndexError: sys.exit(f"{ref_goniometer} is not a valid experiment ID") if ref_scan is not None: try: ref_scan = flat_exps[ref_scan].scan except IndexError: sys.exit(f"{ref_scan} is not a valid experiment ID") if ref_crystal is not None: try: ref_crystal = flat_exps[ref_crystal].crystal except IndexError: sys.exit(f"{ref_crystal} is not a valid experiment ID") if ref_detector is not None: assert not params.reference_from_experiment.average_detector try: ref_detector = flat_exps[ref_detector].detector except IndexError: sys.exit(f"{ref_detector} is not a valid experiment ID") elif params.reference_from_experiment.average_detector: # Average all of the detectors together def average_detectors(target, panelgroups, depth): # Recursive function to do the averaging if (params.reference_from_experiment.average_hierarchy_level is None or depth == params.reference_from_experiment. average_hierarchy_level): n = len(panelgroups) sum_fast = matrix.col((0.0, 0.0, 0.0)) sum_slow = matrix.col((0.0, 0.0, 0.0)) sum_ori = matrix.col((0.0, 0.0, 0.0)) # Average the d matrix vectors for pg in panelgroups: sum_fast += matrix.col(pg.get_local_fast_axis()) sum_slow += matrix.col(pg.get_local_slow_axis()) sum_ori += matrix.col(pg.get_local_origin()) sum_fast /= n sum_slow /= n sum_ori /= n # Re-orthagonalize the slow and the fast vectors by rotating around the cross product c = sum_fast.cross(sum_slow) a = sum_fast.angle(sum_slow, deg=True) / 2 sum_fast = sum_fast.rotate_around_origin(c, a - 45, deg=True) sum_slow = sum_slow.rotate_around_origin(c, -(a - 45), deg=True) target.set_local_frame(sum_fast, sum_slow, sum_ori) if target.is_group(): # Recurse for i, target_pg in enumerate(target): average_detectors(target_pg, [pg[i] for pg in panelgroups], depth + 1) ref_detector = flat_exps[0].detector average_detectors(ref_detector.hierarchy(), [e.detector.hierarchy() for e in flat_exps], 0) combine = CombineWithReference( beam=ref_beam, goniometer=ref_goniometer, scan=ref_scan, crystal=ref_crystal, detector=ref_detector, params=params, ) # set up global experiments and reflections lists reflections = flex.reflection_table() global_id = 0 skipped_expts_min_refl = 0 skipped_expts_max_refl = 0 experiments = ExperimentList() # loop through the input, building up the global lists nrefs_per_exp = [] for ref_wrapper, exp_wrapper in zip(params.input.reflections, params.input.experiments): refs = ref_wrapper.data exps = exp_wrapper.data # Record initial mapping of ids for updating later. ids_map = dict(refs.experiment_identifiers()) # Keep track of mapping of imageset_ids old->new within this experimentlist imageset_result_map = {} for k in refs.experiment_identifiers().keys(): del refs.experiment_identifiers()[k] for i, exp in enumerate(exps): sel = refs["id"] == i sub_ref = refs.select(sel) n_sub_ref = len(sub_ref) if (params.output.min_reflections_per_experiment is not None and n_sub_ref < params.output.min_reflections_per_experiment): skipped_expts_min_refl += 1 continue if (params.output.max_reflections_per_experiment is not None and n_sub_ref > params.output.max_reflections_per_experiment): skipped_expts_max_refl += 1 continue nrefs_per_exp.append(n_sub_ref) sub_ref["id"] = flex.int(len(sub_ref), global_id) # now update identifiers if set. if i in ids_map: sub_ref.experiment_identifiers()[global_id] = ids_map[i] if params.output.delete_shoeboxes and "shoebox" in sub_ref: del sub_ref["shoebox"] try: experiments.append(combine(exp)) except ComparisonError as e: # When we failed tolerance checks, give a useful error message (path, index) = find_experiment_in(exp, params.input.experiments) sys.exit( "Model didn't match reference within required tolerance for experiment {} in {}:" "\n{}\nAdjust tolerances or set compare_models=False to ignore differences." .format(index, path, str(e))) # Rewrite imageset_id, if the experiment has and imageset if exp.imageset and "imageset_id" in sub_ref: # Get the index of the imageset for this experiment and record how it changed new_imageset_id = experiments.imagesets().index( experiments[-1].imageset) old_imageset_id = exps.imagesets().index(exp.imageset) imageset_result_map[old_imageset_id] = new_imageset_id # Check for invalid(?) imageset_id indices... and leave if they are wrong if len(set(sub_ref["imageset_id"])) != 1: logger.warning( "Warning: Experiment %d reflections appear to have come from multiple imagesets - output may be incorrect", i, ) else: sub_ref["imageset_id"] = flex.int( len(sub_ref), new_imageset_id) reflections.extend(sub_ref) global_id += 1 # Include unindexed reflections, if we can safely remap their imagesets if "imageset_id" in reflections: unindexed_refs = refs.select(refs["id"] == -1) for old_id in set(unindexed_refs["imageset_id"]): subs = unindexed_refs.select( unindexed_refs["imageset_id"] == old_id) subs["imageset_id"] = flex.int(len(subs), imageset_result_map[old_id]) reflections.extend(subs) if (params.output.min_reflections_per_experiment is not None and skipped_expts_min_refl > 0): print( "Removed {} experiments with fewer than {} reflections".format( skipped_expts_min_refl, params.output.min_reflections_per_experiment)) if (params.output.max_reflections_per_experiment is not None and skipped_expts_max_refl > 0): print( "Removed {} experiments with more than {} reflections".format( skipped_expts_max_refl, params.output.max_reflections_per_experiment)) # print number of reflections per experiment header = ["Experiment", "Number of reflections"] rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)] print(tabulate(rows, header)) # save a random subset if requested if (params.output.n_subset is not None and len(experiments) > params.output.n_subset): subset_exp = ExperimentList() subset_refls = flex.reflection_table() if params.output.n_subset_method == "random": n_picked = 0 indices = list(range(len(experiments))) if reflections.experiment_identifiers().keys(): indices_to_sel = [] while n_picked < params.output.n_subset: idx = indices.pop(random.randint(0, len(indices) - 1)) indices_to_sel.append(idx) n_picked += 1 # make sure select in order. for idx in sorted(indices_to_sel): subset_exp.append(experiments[idx]) subset_refls = reflections.select(subset_exp) subset_refls.reset_ids() else: while n_picked < params.output.n_subset: idx = indices.pop(random.randint(0, len(indices) - 1)) subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), n_picked) subset_refls.extend(refls) n_picked += 1 print( "Selecting a random subset of {} experiments out of {} total." .format(params.output.n_subset, len(experiments))) elif params.output.n_subset_method == "n_refl": if params.output.n_refl_panel_list is None: refls_subset = reflections else: sel = flex.bool(len(reflections), False) for p in params.output.n_refl_panel_list: sel |= reflections["panel"] == p refls_subset = reflections.select(sel) refl_counts = flex.int() for expt_id in range(len(experiments)): refl_counts.append( (refls_subset["id"] == expt_id).count(True)) sort_order = flex.sort_permutation(refl_counts, reverse=True) if reflections.experiment_identifiers().keys(): for idx in sorted(sort_order[:params.output.n_subset]): subset_exp.append(experiments[idx]) subset_refls = reflections.select(subset_exp) subset_refls.reset_ids() else: for expt_id, idx in enumerate( sort_order[:params.output.n_subset]): subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), expt_id) subset_refls.extend(refls) print( "Selecting a subset of {} experiments with highest number of reflections out of {} total." .format(params.output.n_subset, len(experiments))) elif params.output.n_subset_method == "significance_filter": params.output.significance_filter.enable = True sig_filter = SignificanceFilter(params.output) refls_subset = sig_filter(experiments, reflections) refl_counts = flex.int() for expt_id in range(len(experiments)): refl_counts.append( (refls_subset["id"] == expt_id).count(True)) sort_order = flex.sort_permutation(refl_counts, reverse=True) if reflections.experiment_identifiers().keys(): for idx in sorted(sort_order[:params.output.n_subset]): subset_exp.append(experiments[idx]) subset_refls = reflections.select(subset_exp) subset_refls.reset_ids() else: for expt_id, idx in enumerate( sort_order[:params.output.n_subset]): subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), expt_id) subset_refls.extend(refls) experiments = subset_exp reflections = subset_refls def save_in_batches(experiments, reflections, exp_name, refl_name, batch_size=1000): for i, indices in enumerate( splitit(list(range(len(experiments))), (len(experiments) // batch_size) + 1)): batch_expts = ExperimentList() batch_refls = flex.reflection_table() if reflections.experiment_identifiers().keys(): for sub_idx in indices: batch_expts.append(experiments[sub_idx]) batch_refls = reflections.select(batch_expts) batch_refls.reset_ids() else: for sub_id, sub_idx in enumerate(indices): batch_expts.append(experiments[sub_idx]) sub_refls = reflections.select( reflections["id"] == sub_idx) sub_refls["id"] = flex.int(len(sub_refls), sub_id) batch_refls.extend(sub_refls) exp_filename = os.path.splitext(exp_name)[0] + "_%03d.expt" % i ref_filename = os.path.splitext( refl_name)[0] + "_%03d.refl" % i self._save_output(batch_expts, batch_refls, exp_filename, ref_filename) def combine_in_clusters(experiments_l, reflections_l, exp_name, refl_name, end_count): result = [] for cluster, experiment in enumerate(experiments_l): cluster_expts = ExperimentList() cluster_refls = flex.reflection_table() for i, expts in enumerate(experiment): refls = reflections_l[cluster][i] if refls.experiment_identifiers().keys(): identifier = refls.experiment_identifiers().values()[0] id_val = refls.experiment_identifiers().keys()[0] del refls.experiment_identifiers()[id_val] refls["id"] = flex.int(len(refls), i) refls.experiment_identifiers()[i] = identifier refls.assert_experiment_identifiers_are_consistent( experiment[i:i + 1]) else: refls["id"] = flex.int(len(refls), i) cluster_expts.append(expts) cluster_refls.extend(refls) exp_filename = os.path.splitext(exp_name)[0] + ( "_cluster%d.expt" % (end_count - cluster)) ref_filename = os.path.splitext(refl_name)[0] + ( "_cluster%d.refl" % (end_count - cluster)) result.append( (cluster_expts, cluster_refls, exp_filename, ref_filename)) return result # cluster the resulting experiments if requested if params.clustering.use: clustered = Cluster( experiments, reflections, dendrogram=params.clustering.dendrogram, threshold=params.clustering.threshold, n_max=params.clustering.max_crystals, ) n_clusters = len(clustered.clustered_frames) def not_too_many(keeps): if params.clustering.max_clusters is not None: return len(keeps) < params.clustering.max_clusters return True keep_frames = [] sorted_keys = sorted(clustered.clustered_frames.keys()) while len(clustered.clustered_frames) > 0 and not_too_many( keep_frames): keep_frames.append( clustered.clustered_frames.pop(sorted_keys.pop(-1))) if params.clustering.exclude_single_crystal_clusters: keep_frames = [k for k in keep_frames if len(k) > 1] clustered_experiments = [ ExperimentList([f.experiment for f in frame_cluster]) for frame_cluster in keep_frames ] clustered_reflections = [[f.reflections for f in frame_cluster] for frame_cluster in keep_frames] list_of_combined = combine_in_clusters( clustered_experiments, clustered_reflections, params.output.experiments_filename, params.output.reflections_filename, n_clusters, ) for saveable_tuple in list_of_combined: if params.output.max_batch_size is None: self._save_output(*saveable_tuple) else: save_in_batches(*saveable_tuple, batch_size=params.output.max_batch_size) else: if params.output.max_batch_size is None: self._save_output( experiments, reflections, params.output.experiments_filename, params.output.reflections_filename, ) else: save_in_batches( experiments, reflections, params.output.experiments_filename, params.output.reflections_filename, batch_size=params.output.max_batch_size, ) return
def run_once(directory): from dxtbx.serialize import load sweep_dir = os.path.basename(directory) print(sweep_dir) datablock_name = os.path.join(directory, "datablock.json") if not os.path.exists(datablock_name): # this is what xia2 calls it: datablock_name = os.path.join(directory, "datablock_import.json") strong_spots_name = os.path.join(directory, "strong.pickle") experiments_name = os.path.join(directory, "experiments.json") indexed_spots_name = os.path.join(directory, "indexed.pickle") unindexed_spots_name = os.path.join(directory, "unindexed.pickle") if not (os.path.exists(datablock_name) and os.path.exists(strong_spots_name)): return datablock = load.datablock(datablock_name) assert len(datablock) == 1 if len(datablock[0].extract_sweeps()) == 0: print("Skipping %s" % directory) return sweep = datablock[0].extract_sweeps()[0] template = sweep.get_template() strong_spots = easy_pickle.load(strong_spots_name) n_strong_spots = len(strong_spots) if os.path.exists(experiments_name): experiments = load.experiment_list(experiments_name) n_indexed_lattices = len(experiments) else: experiments = None n_indexed_lattices = 0 g = glob.glob(os.path.join(directory, "xds*", "run_2", "INTEGRATE.HKL")) n_integrated_lattices = len(g) if os.path.exists(indexed_spots_name): indexed_spots = easy_pickle.load(indexed_spots_name) else: indexed_spots = None g = glob.glob(os.path.join(directory, "indexed_*.pickle")) if len(g): for path in g: if indexed_spots is None: indexed_spots = easy_pickle.load(path) else: indexed_spots.extend(easy_pickle.load(path)) if os.path.exists(unindexed_spots_name): unindexed_spots = easy_pickle.load(unindexed_spots_name) n_unindexed_spots = len(unindexed_spots) else: n_unindexed_spots = 0 # calculate estimated d_min for sweep based on 95th percentile from dials.algorithms.indexing import indexer detector = sweep.get_detector() scan = sweep.get_scan() beam = sweep.get_beam() goniometer = sweep.get_goniometer() if len(strong_spots) == 0: d_strong_spots_99th_percentile = 0 d_strong_spots_95th_percentile = 0 d_strong_spots_50th_percentile = 0 n_strong_spots_dmin_4 = 0 else: spots_mm = indexer.Indexer.map_spots_pixel_to_mm_rad( strong_spots, detector, scan) indexer.Indexer.map_centroids_to_reciprocal_space( spots_mm, detector, beam, goniometer) d_spacings = 1 / spots_mm["rlp"].norms() perm = flex.sort_permutation(d_spacings, reverse=True) d_spacings_sorted = d_spacings.select(perm) percentile_99th = int(math.floor(0.99 * len(d_spacings))) percentile_95th = int(math.floor(0.95 * len(d_spacings))) percentile_50th = int(math.floor(0.5 * len(d_spacings))) d_strong_spots_99th_percentile = d_spacings_sorted[percentile_99th] d_strong_spots_95th_percentile = d_spacings_sorted[percentile_95th] d_strong_spots_50th_percentile = d_spacings_sorted[percentile_50th] n_strong_spots_dmin_4 = (d_spacings >= 4).count(True) cell_params = flex.sym_mat3_double() n_indexed = flex.double() d_min_indexed = flex.double() rmsds = flex.vec3_double() sweep_dir_cryst = flex.std_string() if experiments is not None: for i, experiment in enumerate(experiments): sweep_dir_cryst.append(sweep_dir) crystal_model = experiment.crystal unit_cell = crystal_model.get_unit_cell() space_group = crystal_model.get_space_group() crystal_symmetry = crystal.symmetry(unit_cell=unit_cell, space_group=space_group) cb_op_reference_setting = ( crystal_symmetry.change_of_basis_op_to_reference_setting()) crystal_symmetry_reference_setting = crystal_symmetry.change_basis( cb_op_reference_setting) cell_params.append( crystal_symmetry_reference_setting.unit_cell().parameters()) spots_mm = indexed_spots.select(indexed_spots["id"] == i) n_indexed.append(len(spots_mm)) if len(spots_mm) == 0: d_min_indexed.append(0) else: indexer.Indexer.map_centroids_to_reciprocal_space( spots_mm, detector, beam, goniometer) d_spacings = 1 / spots_mm["rlp"].norms() perm = flex.sort_permutation(d_spacings, reverse=True) d_min_indexed.append(d_spacings[perm[-1]]) try: rmsds.append(get_rmsds_obs_pred(spots_mm, experiment)) except Exception as e: print(e) rmsds.append((-1, -1, -1)) continue return group_args( sweep_dir=sweep_dir, template=template, n_strong_spots=n_strong_spots, n_strong_spots_dmin_4=n_strong_spots_dmin_4, n_unindexed_spots=n_unindexed_spots, n_indexed_lattices=n_indexed_lattices, n_integrated_lattices=n_integrated_lattices, d_strong_spots_50th_percentile=d_strong_spots_50th_percentile, d_strong_spots_95th_percentile=d_strong_spots_95th_percentile, d_strong_spots_99th_percentile=d_strong_spots_99th_percentile, cell_params=cell_params, n_indexed=n_indexed, d_min_indexed=d_min_indexed, rmsds=rmsds, sweep_dir_cryst=sweep_dir_cryst, )
def run_once(directory): from dxtbx.serialize import load sweep_dir = os.path.basename(directory) print sweep_dir datablock_name = os.path.join(directory, "datablock.json") if not os.path.exists(datablock_name): # this is what xia2 calls it: datablock_name = os.path.join(directory, "datablock_import.json") strong_spots_name = os.path.join(directory, "strong.pickle") experiments_name = os.path.join(directory, "experiments.json") indexed_spots_name = os.path.join(directory, "indexed.pickle") unindexed_spots_name = os.path.join(directory, "unindexed.pickle") if not (os.path.exists(datablock_name) and os.path.exists(strong_spots_name)): return datablock = load.datablock(datablock_name) assert len(datablock) == 1 if len(datablock[0].extract_sweeps()) == 0: print "Skipping %s" %directory return sweep = datablock[0].extract_sweeps()[0] template = sweep.get_template() strong_spots = easy_pickle.load(strong_spots_name) n_strong_spots = len(strong_spots) if os.path.exists(experiments_name): experiments = load.experiment_list(experiments_name) n_indexed_lattices = len(experiments) else: experiments = None n_indexed_lattices = 0 g = glob.glob(os.path.join(directory, "xds*", "run_2", "INTEGRATE.HKL")) n_integrated_lattices = len(g) if os.path.exists(indexed_spots_name): indexed_spots = easy_pickle.load(indexed_spots_name) else: indexed_spots = None g = glob.glob(os.path.join(directory, "indexed_*.pickle")) if len(g): for path in g: if indexed_spots is None: indexed_spots = easy_pickle.load(path) else: indexed_spots.extend(easy_pickle.load(path)) if os.path.exists(unindexed_spots_name): unindexed_spots = easy_pickle.load(unindexed_spots_name) n_unindexed_spots = len(unindexed_spots) else: n_unindexed_spots = 0 # calculate estimated d_min for sweep based on 95th percentile from dials.algorithms.indexing import indexer detector = sweep.get_detector() scan = sweep.get_scan() beam = sweep.get_beam() goniometer = sweep.get_goniometer() if len(strong_spots) == 0: d_strong_spots_99th_percentile = 0 d_strong_spots_95th_percentile = 0 d_strong_spots_50th_percentile = 0 n_strong_spots_dmin_4 = 0 else: spots_mm = indexer.indexer_base.map_spots_pixel_to_mm_rad( strong_spots, detector, scan) indexer.indexer_base.map_centroids_to_reciprocal_space( spots_mm, detector, beam, goniometer) d_spacings = 1/spots_mm['rlp'].norms() perm = flex.sort_permutation(d_spacings, reverse=True) d_spacings_sorted = d_spacings.select(perm) percentile_99th = int(math.floor(0.99 * len(d_spacings))) percentile_95th = int(math.floor(0.95 * len(d_spacings))) percentile_50th = int(math.floor(0.5 * len(d_spacings))) d_strong_spots_99th_percentile = d_spacings_sorted[percentile_99th] d_strong_spots_95th_percentile = d_spacings_sorted[percentile_95th] d_strong_spots_50th_percentile = d_spacings_sorted[percentile_50th] n_strong_spots_dmin_4 = (d_spacings >= 4).count(True) cell_params = flex.sym_mat3_double() n_indexed = flex.double() d_min_indexed = flex.double() rmsds = flex.vec3_double() sweep_dir_cryst = flex.std_string() if experiments is not None: for i, experiment in enumerate(experiments): sweep_dir_cryst.append(sweep_dir) crystal_model = experiment.crystal unit_cell = crystal_model.get_unit_cell() space_group = crystal_model.get_space_group() crystal_symmetry = crystal.symmetry(unit_cell=unit_cell, space_group=space_group) cb_op_reference_setting = crystal_symmetry.change_of_basis_op_to_reference_setting() crystal_symmetry_reference_setting = crystal_symmetry.change_basis( cb_op_reference_setting) cell_params.append(crystal_symmetry_reference_setting.unit_cell().parameters()) spots_mm = indexed_spots.select(indexed_spots['id'] == i) n_indexed.append(len(spots_mm)) if len(spots_mm) == 0: d_min_indexed.append(0) else: indexer.indexer_base.map_centroids_to_reciprocal_space( spots_mm, detector, beam, goniometer) d_spacings = 1/spots_mm['rlp'].norms() perm = flex.sort_permutation(d_spacings, reverse=True) d_min_indexed.append(d_spacings[perm[-1]]) try: rmsds.append(get_rmsds_obs_pred(spots_mm, experiment)) except Exception, e: print e rmsds.append((-1,-1,-1)) continue