def test_ann(): reference = flex.double() for j in range(3 * 100): reference.append(random.random()) query = flex.double() for j in range(3 * 10): query.append(random.random()) ann = ann_adaptor(data=reference, dim=3, k=1) ann.query(query) # workout code - see how far separated on average they are - which should # in principle decrease as the number of positions in the reference set # increases offsets = [] for j in range(10): q = matrix.col([query[3 * j + k] for k in range(3)]) r = matrix.col([reference[3 * ann.nn[j] + k] for k in range(3)]) offsets.append((q - r).length()) return meansd(offsets)
def compare(xyz_to_hkl, xyz_to_hkl_ref): # construct ann to perform search... from cctbx.array_family import flex from annlib_ext import AnnAdaptor as ann_adaptor reference = flex.double() xyzs = [xyz for xyz in xyz_to_hkl] for xyz in xyzs: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) ann = ann_adaptor(data = reference, dim = 3, k = 1) n_correct = 0 n_wrong = 0 for xyz in xyz_to_hkl_ref: query = flex.double(xyz) ann.query(query) nnxyz = xyzs[ann.nn[0]] if xyz_to_hkl_ref[xyz] == xyz_to_hkl[nnxyz]: n_correct += 1 else: n_wrong += 1 return n_correct, n_wrong
def matcher(reference, moving, params): from annlib_ext import AnnAdaptor as ann_adaptor from dials.array_family import flex rxyz = reference['xyzobs.px.value'].parts() mxyz = moving['xyzobs.px.value'].parts() rxy = flex.vec2_double(rxyz[0], rxyz[1]) mxy = flex.vec2_double(mxyz[0], mxyz[1]) ann = ann_adaptor(rxy.as_double().as_1d(), 2) ann.query(mxy.as_double().as_1d()) distances = flex.sqrt(ann.distances) matches = (distances < params.far) & (distances >= params.close) xyr = flex.vec2_double() xym = flex.vec2_double() for j in range(matches.size()): if not matches[j]: continue xym.append(mxy[j]) xyr.append(rxy[ann.nn[j]]) # filter outliers - use IQR etc. dxy = xym - xyr dx, dy = dxy.parts() iqx = IQR(dx.select(flex.sort_permutation(dx))) iqy = IQR(dy.select(flex.sort_permutation(dy))) keep_x = (dx > (iqx[0] - iqx[3])) & (dx < (iqx[2] + iqx[3])) keep_y = (dy > (iqy[0] - iqy[3])) & (dy < (iqy[2] + iqy[3])) keep = keep_x & keep_y xyr = xyr.select(keep) xym = xym.select(keep) # compute Rt R, t, d, n = Rt(xyr, xym) # verify matches in original image coordinate system from scitbx import matrix import math _R = matrix.sqr(R) rmsd = 0.0 for j, _xym in enumerate(xym): _xymm = _R * _xym + matrix.col(t) rmsd += (matrix.col(xyr[j]) - _xymm).length()**2 assert abs(math.sqrt(rmsd / xym.size()) - d) < 1e-6 return R, t, d, n
def pair_up(reference, moving, params, R0, t0): from annlib_ext import AnnAdaptor as ann_adaptor from dials.array_family import flex rxyz = reference['xyzobs.px.value'].parts() mxyz = moving['xyzobs.px.value'].parts() # apply R0, t0 before performing matching - so should ideally be in almost # right position rxy = flex.vec2_double(rxyz[0], rxyz[1]) _mxy = flex.vec2_double(mxyz[0], mxyz[1]) mxy = flex.vec2_double() for __mxy in _mxy: mxy.append((R0 * __mxy + t0).elems) ann = ann_adaptor(rxy.as_double().as_1d(), 2) ann.query(mxy.as_double().as_1d()) distances = flex.sqrt(ann.distances) matches = (distances < params.far) rsel = flex.size_t() msel = flex.size_t() xyr = flex.vec2_double() xym = flex.vec2_double() for j in range(matches.size()): if not matches[j]: continue msel.append(j) rsel.append(ann.nn[j]) xym.append(mxy[j]) xyr.append(rxy[ann.nn[j]]) # filter outliers - use IQR etc. dxy = xym - xyr dx, dy = dxy.parts() iqx = IQR(dx.select(flex.sort_permutation(dx))) iqy = IQR(dy.select(flex.sort_permutation(dy))) keep_x = (dx > (iqx[0] - iqx[3])) & (dx < (iqx[2] + iqx[3])) keep_y = (dy > (iqy[0] - iqy[3])) & (dy < (iqy[2] + iqy[3])) keep = keep_x & keep_y return rsel.select(keep), msel.select(keep)
def validate_predictions(integrate_hkl, uc1_2): observations = read_integrate_hkl(integrate_hkl) predictions = read_uc1_2(uc1_2) reference = flex.double() query = flex.double() for hkl, xyz, isigma in observations: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) for hkl, xyz, isigma in predictions: query.append(xyz[0]) query.append(xyz[1]) query.append(xyz[2]) ann = ann_adaptor(data=reference, dim=3, k=1) ann.query(query) dxs = [] dys = [] dzs = [] ivalues_o = [] ivalues_p = [] for j in range(len(predictions)): c = ann.nn[j] if observations[c][0] == predictions[j][0]: xyz = observations[c][1] dx = observations[c][1][0] - predictions[j][1][0] dy = observations[c][1][1] - predictions[j][1][1] dz = observations[c][1][2] - predictions[j][1][2] dxs.append(dx) dys.append(dy) dzs.append(dz) ivalues_o.append(observations[c][2][0]) ivalues_p.append(predictions[j][2][0]) print(observations[c][2][0], predictions[j][2][0]) return meansd(dxs), meansd(dys), meansd(dzs), cc(ivalues_o, ivalues_p)
def validate_predictions(integrate_hkl, uc1_2): observations = read_integrate_hkl(integrate_hkl) predictions = read_uc1_2(uc1_2) reference = flex.double() query = flex.double() for hkl, xyz, isigma in observations: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) for hkl, xyz, isigma in predictions: query.append(xyz[0]) query.append(xyz[1]) query.append(xyz[2]) ann = ann_adaptor(data=reference, dim=3, k=1) ann.query(query) dxs = [] dys = [] dzs = [] ivalues_o = [] ivalues_p = [] for j in range(len(predictions)): c = ann.nn[j] if observations[c][0] == predictions[j][0]: xyz = observations[c][1] dx = observations[c][1][0] - predictions[j][1][0] dy = observations[c][1][1] - predictions[j][1][1] dz = observations[c][1][2] - predictions[j][1][2] dxs.append(dx) dys.append(dy) dzs.append(dz) ivalues_o.append(observations[c][2][0]) ivalues_p.append(predictions[j][2][0]) print observations[c][2][0], predictions[j][2][0] return meansd(dxs), meansd(dys), meansd(dzs), cc(ivalues_o, ivalues_p)
def validate_predictions(spot_xds, uc1_2): observations = read_spot_xds(spot_xds) predictions = read_uc1_2(uc1_2) reference = flex.double() query = flex.double() for hkl, xyz in observations: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) for hkl, xyz in predictions: query.append(xyz[0]) query.append(xyz[1]) query.append(xyz[2]) ann = ann_adaptor(data = reference, dim = 3, k = 1) ann.query(query) dxs = [] dys = [] dzs = [] for j in range(len(predictions)): c = ann.nn[j] if observations[c][0] == predictions[j][0]: xyz = observations[c][1] x, y, z = predictions[j][1] dx = observations[c][1][0] - predictions[j][1][0] dy = observations[c][1][1] - predictions[j][1][1] dz = observations[c][1][2] - predictions[j][1][2] dxs.append(dx) dys.append(dy) dzs.append(dz) print x, y, z, dx, dy, dz return meansd(dxs), meansd(dys), meansd(dzs)
def validate_predictions(spot_xds, uc1_2): observations = read_spot_xds(spot_xds) predictions = read_uc1_2(uc1_2) reference = flex.double() query = flex.double() for hkl, xyz in observations: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) for hkl, xyz in predictions: query.append(xyz[0]) query.append(xyz[1]) query.append(xyz[2]) ann = ann_adaptor(data = reference, dim = 3, k = 1) ann.query(query) dxs = [] dys = [] dzs = [] for j in range(len(predictions)): c = ann.nn[j] if observations[c][0] == predictions[j][0]: xyz = observations[c][1] x, y, z = predictions[j][1] dx = observations[c][1][0] - predictions[j][1][0] dy = observations[c][1][1] - predictions[j][1][1] dz = observations[c][1][2] - predictions[j][1][2] dxs.append(dx) dys.append(dy) dzs.append(dz) print(x, y, z, dx, dy, dz) return meansd(dxs), meansd(dys), meansd(dzs)
def main(mtz_file, xds_integrate_file): mos_hkl_xyz_isigi = get_hkl_xyz_isigi(mtz_file) print 'Read %d observations from %s' % (len(mos_hkl_xyz_isigi), mtz_file) xds_hkl_xyz_isigi = read_xds_integrate(xds_integrate_file) print 'Read %d observations from %s' % \ (len(xds_hkl_xyz_isigi), xds_integrate_file) # treat XDS as reference, mosflm as query (arbitrary) reference = flex.double() query = flex.double() for hkl, xyz, isigi in xds_hkl_xyz_isigi: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) for hkl, xyz, isigi in mos_hkl_xyz_isigi: query.append(xyz[0]) query.append(xyz[1]) query.append(xyz[2]) ann = ann_adaptor(data = reference, dim = 3, k = 1) ann.query(query) i_s_mos = [] i_s_xds = [] for j in range(len(mos_hkl_xyz_isigi)): c = ann.nn[j] if xds_hkl_xyz_isigi[c][0] == mos_hkl_xyz_isigi[j][0]: i_s_mos.append(mos_hkl_xyz_isigi[j][2][0]) i_s_xds.append(xds_hkl_xyz_isigi[c][2][0]) print 'Matched %d observations' % len(i_s_mos) print cc(i_s_mos, i_s_xds)
def main(mtz_file, xds_integrate_file): mos_hkl_xyz_isigi = get_hkl_xyz_isigi(mtz_file) print('Read %d observations from %s' % (len(mos_hkl_xyz_isigi), mtz_file)) xds_hkl_xyz_isigi = read_xds_integrate(xds_integrate_file) print('Read %d observations from %s' % \ (len(xds_hkl_xyz_isigi), xds_integrate_file)) # treat XDS as reference, mosflm as query (arbitrary) reference = flex.double() query = flex.double() for hkl, xyz, isigi in xds_hkl_xyz_isigi: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) for hkl, xyz, isigi in mos_hkl_xyz_isigi: query.append(xyz[0]) query.append(xyz[1]) query.append(xyz[2]) ann = ann_adaptor(data=reference, dim=3, k=1) ann.query(query) i_s_mos = [] i_s_xds = [] for j in range(len(mos_hkl_xyz_isigi)): c = ann.nn[j] if xds_hkl_xyz_isigi[c][0] == mos_hkl_xyz_isigi[j][0]: i_s_mos.append(mos_hkl_xyz_isigi[j][2][0]) i_s_xds.append(xds_hkl_xyz_isigi[c][2][0]) print('Matched %d observations' % len(i_s_mos)) print(cc(i_s_mos, i_s_xds))
def run(args): import libtbx.load_env usage = "%s [options]" % libtbx.env.dispatcher_name parser = OptionParser( usage=usage, phil=phil_scope, check_format=False, epilog=help_message ) params, options, args = parser.parse_args( show_diff_phil=True, return_unhandled=True ) assert len(args) == 2 from iotbx.reflection_file_reader import any_reflection_file xyz = [] intensities = [] lp_corrections = [] for f in args: xdet = None ydet = None rot = None i_sigi = None lp = None arrays = any_reflection_file(f).as_miller_arrays(merge_equivalents=False) for ma in arrays: print(ma.info().labels) if ma.info().labels[0] == "XDET": xdet = ma elif ma.info().labels[0] == "YDET": ydet = ma elif ma.info().labels[0] == "ROT": rot = ma elif ma.info().labels == ["I", "SIGI"]: i_sigi = ma elif ma.info().labels[0] == "LP": lp = ma assert [xdet, ydet, rot, i_sigi, lp].count(None) == 0 xyz.append(flex.vec3_double(xdet.data(), ydet.data(), rot.data())) intensities.append(i_sigi) lp_corrections.append(lp) xyz1, xyz2 = xyz xyz2 += (1e-3, 1e-3, 1e-3) intensities1, intensities2 = intensities lp1, lp2 = lp_corrections # Do the nn match from annlib_ext import AnnAdaptor as ann_adaptor ann = ann_adaptor(xyz1.as_double().as_1d(), 3) ann.query(xyz2.as_double().as_1d()) distances = flex.sqrt(ann.distances) matches = distances < 2 # pixels index1 = flex.size_t(list(ann.nn.select(matches))) index2 = flex.size_t(list(matches.iselection())) intensities1 = intensities1.select(index1) intensities2 = intensities2.select(index2) isigi1 = intensities1.data() / intensities1.sigmas() isigi2 = intensities2.data() / intensities2.sigmas() lp1 = lp1.select(index1) lp2 = lp2.select(index2) ##differences = intensities1.data() - intensities2.data() ##sums = intensities1.data() + intensities2.data() # differences = isigi1 - isigi2 # sums = isigi1 + isigi2 # assert sums.all_ne(0) # dos = differences/sums # mean_dos = [] # binner = intensities1.setup_binner_d_star_sq_step(d_star_sq_step=0.01) # d_spacings = intensities1.d_spacings().data() # for i in range(binner.n_bins_used()): # d_max, d_min = binner.bin_d_range(i+1) # bin_sel = (d_spacings > d_min) & (d_spacings <= d_max) # mean_dos.append(flex.mean(dos.select(bin_sel))) # set backend before importing pyplot import matplotlib matplotlib.use("Agg") from matplotlib import pyplot pyplot.scatter(intensities1.data(), intensities2.data(), marker="+", alpha=0.5) m = max(pyplot.xlim()[1], pyplot.ylim()[1]) pyplot.plot((0, m), (0, m), c="black") pyplot.savefig("scatter_intensities.png") pyplot.clf() pyplot.scatter(intensities1.sigmas(), intensities2.sigmas(), marker="+", alpha=0.5) m = max(pyplot.xlim()[1], pyplot.ylim()[1]) pyplot.plot((0, m), (0, m), c="black") pyplot.savefig("scatter_sigmas.png") pyplot.clf() pyplot.scatter( flex.pow2(intensities1.sigmas()), flex.pow2(intensities2.sigmas()), marker="+", alpha=0.5, ) m = max(pyplot.xlim()[1], pyplot.ylim()[1]) pyplot.plot((0, m), (0, m), c="black") pyplot.savefig("scatter_variances.png") pyplot.clf() pyplot.scatter(isigi1, isigi2, marker="+", alpha=0.5) m = max(pyplot.xlim()[1], pyplot.ylim()[1]) pyplot.plot((0, m), (0, m), c="black") pyplot.savefig("scatter_i_sig_i.png") pyplot.clf() pyplot.scatter(lp1.data(), lp2.data(), marker="+", alpha=0.5) m = max(pyplot.xlim()[1], pyplot.ylim()[1]) pyplot.plot((0, m), (0, m)) pyplot.savefig("scatter_LP.png") pyplot.clf() # from cctbx import uctbx # pyplot.scatter(uctbx.d_star_sq_as_d(binner.bin_centers(2)), mean_dos) # pyplot.savefig('mean_dos.png') # pyplot.clf() return
def main(args): reflections = load_data(args[0]) aimless_scaled_file = args[1] dials_hkl_xyz_isigi = read_dials_scaled(reflections) print("Read %d observations from %s" % (len(dials_hkl_xyz_isigi), args[0])) aimless_hkl_xyz_isigi = read_aimless(aimless_scaled_file) print("Read %d observations from %s" % (len(aimless_hkl_xyz_isigi), aimless_scaled_file)) # treat aimless as reference, dials as query reference = flex.double() query = flex.double() for hkl, xyz, isigi, scale in dials_hkl_xyz_isigi: query.append(xyz[0]) query.append(xyz[1]) query.append(xyz[2]) # print hkl, xyz, isigi # extract xyz positions for hkl, xyz, isigi, scale in aimless_hkl_xyz_isigi: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) # print hkl, xyz, isigi ann = ann_adaptor(data=reference, dim=3, k=1) ann.query(query) i_s_dials = [] i_s_aimless = [] # go through matching the reflections, then appending the (scaled) I's to a list for j in range(len(dials_hkl_xyz_isigi)): c = ann.nn[j] if aimless_hkl_xyz_isigi[c][0] == dials_hkl_xyz_isigi[j][0]: # print aimless_hkl_xyz_isigi[c][0], dials_hkl_xyz_isigi[j][0] i_s_dials.append(dials_hkl_xyz_isigi[j][3]) i_s_aimless.append(aimless_hkl_xyz_isigi[c][3]) # else: # print aimless_hkl_xyz_isigi[c][0], dials_hkl_xyz_isigi[j][0] # calculate correlation between I's print("Matched %d observations" % len(i_s_dials)) correlation_coefficient = cc(i_s_dials, i_s_aimless) R_factor = R(i_s_dials, i_s_aimless) print("CC: %.6f" % correlation_coefficient) print("R: %.3f" % R_factor) import matplotlib.pyplot as plt y_ideal = [x for x in i_s_dials] plt.figure(figsize=(10, 7)) plt.scatter(i_s_dials, i_s_aimless, s=0.1) plt.plot(i_s_dials, y_ideal, color="r") plt.xlabel("Inverse scale factor in DIALS") plt.ylabel("Inverse scale factor in aimless") plt.axes().set_aspect("equal") plt.title("""Comparison inverse scale factors from aimless and dials.aimless_scaling, CC = %.5f, R = %.3f""" % (correlation_coefficient, R_factor)) plt.savefig("Aimless_DIALS_comparison.png") plt.show() return correlation_coefficient, R_factor
def reconstruct_rogues(params): assert os.path.exists('xia2.json') from xia2.Schema.XProject import XProject xinfo = XProject.from_json(filename='xia2.json') from dxtbx.model.experiment.experiment_list import ExperimentListFactory import cPickle as pickle import dials # because WARNING:root:No profile class gaussian_rs registered crystals = xinfo.get_crystals() assert len(crystals) == 1 for xname in crystals: crystal = crystals[xname] scaler = crystal._get_scaler() epochs = scaler._sweep_handler.get_epochs() rogues = os.path.join(scaler.get_working_directory(), xname, 'scale', 'ROGUES') rogue_reflections = munch_rogues(rogues) batched_reflections = { } for epoch in epochs: si = scaler._sweep_handler.get_sweep_information(epoch) intgr = si.get_integrater() experiments = ExperimentListFactory.from_json_file( intgr.get_integrated_experiments()) reflections = pickle.load(open(intgr.get_integrated_reflections())) batched_reflections[si.get_batch_range()] = (experiments, reflections, si.get_sweep_name()) # - look up reflection in reflection list, get bounding box # - pull pixels given from image set, flatten these, write out from dials.array_family import flex from annlib_ext import AnnAdaptor as ann_adaptor reflections_run = { } for run in batched_reflections: reflections_run[run] = [] for rogue in rogue_reflections: b = rogue[0] for run in batched_reflections: if b >= run[0] and b <= run[1]: reflections_run[run].append(rogue) break for run_no, run in enumerate(reflections_run): experiment = batched_reflections[run][0] reflections = batched_reflections[run][1] name = batched_reflections[run][2] rogues = reflections_run[run] reference = flex.double() scan = experiment.scans()[0] images = experiment.imagesets()[0] for xyz in reflections['xyzcal.px']: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) search = flex.double() for rogue in rogues: search.append(rogue[1]) search.append(rogue[2]) search.append(scan.get_array_index_from_angle(rogue[3])) ann = ann_adaptor(data=reference, dim=3, k=1) ann.query(search) keep = flex.bool(len(reflections), False) for j, rogue in enumerate(rogues): keep[ann.nn[j]] = True reflections = reflections.select(keep==True) if params.extract: reflections["shoebox"] = flex.shoebox( reflections["panel"], reflections["bbox"], allocate=True) reflections.extract_shoeboxes(images, verbose=False) if len(reflections_run) > 1: output = params.output.reflections.replace( '.pickle', '-%s.pickle' % name) print 'Extracted %d rogue reflections for %s to %s' % \ (len(reflections), name, output) reflections.as_pickle(output) else: output = params.output.reflections print 'Extracted %d rogue reflections to %s' % \ (len(reflections), output) reflections.as_pickle(output)
def run(args): table_1 = data_from_mtz(args[0]) table_2 = data_from_mtz(args[1]) zr1 = max(table_1["z"]) - min(table_1["z"]) zr2 = max(table_2["z"]) - min(table_2["z"]) if zr1 / zr2 > 1.2: table_2["z"] *= int(zr1 / zr2) print( "scaled ROT values of dataset 2 by %s to match dataset 1" % int(zr1 / zr2) ) elif zr2 / zr1 > 1.2: table_1["z"] *= int(zr2 / zr1) print( "scaled ROT values of dataset 1 by %s to match dataset 2" % int(zr2 / zr1) ) # print(br1, br2) # print(max(br1/br2, br2/br1)) table_1["match_idx"] = flex.int(table_1.size(), -1) xy0 = flex.vec2_double(table_1["x"], table_1["y"]) # add 1e-6 as if the values are exactly the same then not found as nearest # neighbour! e.g. trying to compare dials vs aimless xy1 = flex.vec2_double(table_2["x"] + 1e-2, table_2["y"]) z0 = table_1["z"] z1 = table_2["z"] zs = range(int(flex.min(z0)), int(flex.max(z0)) + 1) matches = 0 for z in zs: sel = (z0 < z + max_z_diff) & (z0 > z - max_z_diff) xy = xy0.select(sel) table1_indices = sel.iselection() ann = ann_adaptor(xy.as_double().as_1d(), 2) sel2 = (z1 < z + max_z_diff) & (z1 > z - max_z_diff) xy = xy1.select(sel2) table2_indices = sel2.iselection() xy1d = xy.as_double().as_1d() ann.query(xy1d) for i, nn_idx in enumerate( ann.nn ): # nn_idx is index of table1, i index of table 2 if math.sqrt(ann.distances[i]) < max_allowable_distance: table_1["match_idx"][table1_indices[nn_idx]] = table2_indices[i] matches += 1 print("Table 1 size:%s" % table_1.size()) print("Table 2 size:%s" % table_2.size()) print( "Found %s matches (searching for matches to table1 in table2)" % str((table_1["match_idx"] != -1).count(True)) ) indices = set(table_1["match_idx"]) n_unique = len(indices) if -1 in indices: n_unique -= 1 print("%s unique matches" % str(n_unique)) correctly_matched = 0 incorrectly_matched = 0 for i in range(len(table_1)): if table_1["match_idx"][i] != -1: if ( table_1["miller_index"][i] == table_2["miller_index"][table_1["match_idx"][i]] ): correctly_matched += 1 else: # pass incorrectly_matched += 1 # print("Incorrectly matched %s, %s" % (table_1["miller_index"][i], table_2["miller_index"][table_1["match_idx"][i]])) table_1["match_idx"][i] = -1 print("N correctly matched %s" % correctly_matched) print("N incorrectly matched %s" % incorrectly_matched) report_on_non_matches(table_1, table_2, dmin) plot_scales(table_1, table_2)
def saturation_analysis(data_files, value_column): import six.moves.cPickle as pickle from dials.array_family import flex from annlib_ext import AnnAdaptor as ann_adaptor reference = data_files[0] rest = data_files[1:] with open(reference, "rb") as fh: reference_data = pickle.load(fh) assert value_column in reference_data variance_column = None for column in [ "%s.variance" % value_column, value_column.replace("value", "variance"), ]: if column in reference_data: variance_column = column break assert variance_column # construct XYZ pixel position search target reference_data = strip_not_integrated(reference_data) # keep only data with I/sig(I) > 3 for reference strong = reference_data[value_column] > 3 * flex.sqrt( reference_data[variance_column]) print("Keeping %d strong reflections of %d" % (strong.count(True), len(reference_data))) reference_data = reference_data.select(strong) xyz = reference_data["xyzcal.px"].as_double() ann = ann_adaptor(data=xyz, dim=3, k=1) for qpno, query_pickle in enumerate(rest): x = flex.double() y = flex.double() fout = open("matches%02d.dat" % qpno, "w") with open(query_pickle, "rb") as fh: query_data = strip_not_integrated(pickle.load(fh)) qxyz = query_data["xyzcal.px"].as_double() ann.query(qxyz) matches = 0 for j, refl in enumerate(query_data): rrefl = reference_data[ann.nn[j]] if refl["miller_index"] == rrefl["miller_index"]: fout.write("%d %d %d " % refl["miller_index"] + "%f %f " % (rrefl[value_column], rrefl[variance_column]) + "%f %f " % (refl[value_column], refl[variance_column]) + "%f %f %f\n" % refl["xyzcal.px"]) matches += 1 x.append(rrefl[value_column]) y.append(refl[value_column]) print("For %s matched %d/%d" % (query_pickle, matches, len(query_data))) fout.close() from matplotlib import pyplot pyplot.scatter(x.as_numpy_array(), y.as_numpy_array()) pyplot.show()
def compare_chunks(integrate_hkl, integrate_pkl, crystal_json, sweep_json, d_min=0.0): from cctbx.array_family import flex from annlib_ext import AnnAdaptor as ann_adaptor from dials.model.serialize import load sweep = load.sweep(sweep_json) rdx = derive_reindex_matrix(crystal_json, sweep_json, integrate_hkl) print "Reindex matrix:\n%d %d %d\n%d %d %d\n%d %d %d" % (rdx.elems) uc = integrate_hkl_to_unit_cell(integrate_hkl) xhkl, xi, xsigi, xxyz, xlp = pull_reference(integrate_hkl, d_min=d_min) dhkl, di, dsigi, dxyz, dlp = pull_calculated(integrate_pkl) reference = flex.double() query = flex.double() for xyz in xxyz: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) for xyz in dxyz: query.append(xyz[0]) query.append(xyz[1]) query.append(xyz[2]) # perform the match ann = ann_adaptor(data=reference, dim=3, k=1) ann.query(query) XDS = [] DIALS = [] HKL = [] XYZ = [] SIGMA_XDS = [] SIGMA_DIALS = [] XLP = [] DLP = [] # perform the analysis for j, hkl in enumerate(dhkl): c = ann.nn[j] if hkl == tuple(rdx * xhkl[c]): XDS.append(xi[c]) DIALS.append(di[j]) HKL.append(hkl) XYZ.append(dxyz[j]) SIGMA_XDS.append(xsigi[c]) SIGMA_DIALS.append(dsigi[j]) XLP.append(xlp[c]) DLP.append(dlp[j]) print "Found %d matches" % len(XDS) compare = CompareIntensity(sweep, uc, HKL, XYZ, XDS, DIALS, SIGMA_XDS, SIGMA_DIALS, XLP, DLP) # compare.plot_scale_factor_vs_resolution() # compare.plot_scale_factor_vs_frame_number() compare.plot_chunked_statistics_vs_resolution() compare.plot_chunked_statistics_vs_frame_number() compare.plot_chunked_statistics_vs_i_over_sigma() compare.plot_chunked_i_over_sigma_vs_frame_number() compare.plot_chunked_resolution_vs_frame_number() compare.plot_chunked_lp_vs_frame_number() compare.plot_scale_vs_x_y() compare.plot_scale_vs_i_over_sigma()
zs = range(int(flex.min(z)), int(flex.max(z)) + 1) n = len(zs) sij = flex.double(n * n, 0.0) sij.reshape(flex.grid(n, n)) for z0 in zs: s0 = z == (z0 + 0.5) xy0 = xy.select(s0) n0 = xy0.size() if n0 < 5: continue from annlib_ext import AnnAdaptor as ann_adaptor ann = ann_adaptor(xy0.as_double().as_1d(), 2) for z1 in zs: if z1 >= z0: break s1 = z == (z1 + 0.5) xy1 = xy.select(s1) n1 = xy1.size() if n1 < 5: continue ann.query(xy1.as_double().as_1d()) d1 = flex.sqrt(ann.distances) m01 = (d1 < 5.0).count(True) s = m01 / (0.5 * (n0 + n1)) sij[z0, z1] = s sij[z1, z0] = s
def compare_chunks(integrate_mtz, integrate_hkl): from annlib_ext import AnnAdaptor as ann_adaptor uc = integrate_mtz_to_unit_cell(integrate_mtz) rdx = derive_reindex_matrix(integrate_hkl, integrate_mtz) print(rdx) xhkl, xi, xsigi, xxyz = pull_reference(integrate_mtz) dhkl, di, dsigi, dxyz = pull_reference_xds(integrate_hkl) reference = flex.double() query = flex.double() for xyz in xxyz: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) for xyz in dxyz: query.append(xyz[0]) query.append(xyz[1]) query.append(xyz[2]) # perform the match ann = ann_adaptor(data=reference, dim=3, k=1) ann.query(query) MOS = [] XDS = [] HKL = [] # perform the analysis for j, hkl in enumerate(dhkl): c = ann.nn[j] if hkl == tuple(rdx * xhkl[c]): MOS.append(xi[c]) XDS.append(di[j]) HKL.append(hkl) # now compute resolution for every reflection - or at least each unique # Miller index... unique = set(HKL) resolutions = {} for hkl in unique: resolutions[hkl] = uc.d(hkl) # then resort the list in terms of resolution, then reverse it sort_me = [] for hkl, mos, xds in zip(HKL, MOS, XDS): sort_me.append((resolutions[hkl], mos, xds)) sort_me.sort() sort_me.reverse() resolutions = [sm[0] for sm in sort_me] MOS = [sm[1] for sm in sort_me] XDS = [sm[2] for sm in sort_me] # then extract the original observation structure print("Paired %d observations" % len(MOS)) chunks = [(i, i + 1000) for i in range(0, len(MOS), 1000)] ccs = [] rs = [] ss = [] for chunk in chunks: mos = MOS[chunk[0]:chunk[1]] xds = XDS[chunk[0]:chunk[1]] resols = resolutions[chunk[0]:chunk[1]] if len(mos) < 100: break c = cc(xds, mos) r, s = R(xds, mos) print("%7d %4d %.3f %.3f %.3f %.3f %.3f" % (chunk[0], len(mos), min(resols), max(resols), c, r, s)) ccs.append(c) rs.append(r) ss.append(s) chunks = [j for j in range(len(chunks))] # kludge - if we fall off chunks = chunks[:len(rs)] from matplotlib import pyplot pyplot.xlabel("Chunk") pyplot.ylabel("Statistic") pyplot.title("Statistics for 1000 reflection-pair chunks") pyplot.plot(chunks, ccs, label="CC") pyplot.plot(chunks, rs, label="R") pyplot.plot(chunks, ss, label="K") pyplot.legend() pyplot.savefig("plot-xds-vs-mosflm.png") pyplot.close() return
def run(args): from cctbx.array_family import flex from dials.util.options import OptionParser from dials.util.options import flatten_reflections import libtbx.load_env usage = "%s [options] reflections_1.pickle reflections_2.pickle" % ( libtbx.env.dispatcher_name) parser = OptionParser(usage=usage, phil=phil_scope, read_reflections=True, epilog=help_message) params, options, args = parser.parse_args(show_diff_phil=True, return_unhandled=True) reflections = flatten_reflections(params.input.reflections) if flex.max(reflections[0]["id"]) > 0: reflections = list(reversed(reflections)) assert flex.max(reflections[0]["id"]) == 0 assert len(reflections) == 2 partialities = [] intensities = [] sigmas = [] ids = [] xyz = [] # only want fully-recorded reflections in full dataset # reflections[0] = reflections[0].select(reflections[0]['partiality'] > 0.99) print(reflections[0].size()) # only want partial reflections in sliced dataset # reflections[1] = reflections[1].select(reflections[1]['partiality'] < 0.99) print(reflections[1].size()) for refl in reflections: # sel = refl.get_flags(refl.flags.integrated_sum) sel = refl.get_flags(refl.flags.integrated) sel &= refl["intensity.sum.value"] > 0 sel &= refl["intensity.sum.variance"] > 0 refl = refl.select(sel) hkl = refl["miller_index"] partiality = refl["partiality"] intensity = refl["intensity.sum.value"] vari = refl["intensity.sum.variance"] assert vari.all_gt(0) sigi = flex.sqrt(vari) intensities.append(intensity) partialities.append(partiality) sigmas.append(sigi) ids.append(refl["id"]) xyz.append(refl["xyzcal.px"]) from annlib_ext import AnnAdaptor as ann_adaptor ann = ann_adaptor(xyz[0].as_double().as_1d(), 3) ann.query(xyz[1].as_double().as_1d()) distances = flex.sqrt(ann.distances) matches = distances < 2 # pixels isel0 = flex.size_t(list(ann.nn.select(matches))) isel1 = flex.size_t(list(matches.iselection())) p0 = partialities[0].select(isel0) p1 = partialities[1].select(isel1) i0 = intensities[0].select(isel0) i1 = intensities[1].select(isel1) print((p0 > p1).count(True), (p0 < p1).count(True)) h0 = flex.histogram(p0, data_min=0, data_max=1, n_slots=20) h1 = flex.histogram(p1, data_min=0, data_max=1, n_slots=20) h0.show() h1.show() from matplotlib import pyplot perm0 = flex.sort_permutation(p0) perm1 = flex.sort_permutation(p1) fig, axes = pyplot.subplots(nrows=2, sharex=True) axes[0].plot(p0.select(perm0), flex.int_range(p0.size())) axes[1].plot(p1.select(perm1), flex.int_range(p1.size())) axes[1].set_xlabel("Partiality") for ax in axes: ax.set_ylabel("Cumulative frequency") for ax in axes: ax.set_yscale("log") pyplot.savefig("sorted_partialities.png") pyplot.clf() blue = "#3498db" fig, axes = pyplot.subplots(nrows=2, sharex=True) axes[0].bar( h0.slot_centers(), h0.slots(), width=h0.slot_width(), align="center", color=blue, edgecolor=blue, ) axes[1].bar( h1.slot_centers(), h1.slots(), width=h1.slot_width(), align="center", color=blue, edgecolor=blue, ) axes[1].set_xlabel("Partiality") for ax in axes: ax.set_ylabel("Frequency") for ax in axes: ax.set_yscale("log") pyplot.savefig("partiality_histogram.png") # pyplot.show() pyplot.clf() pyplot.scatter(p0, p1, s=5, alpha=0.3, marker="+") pyplot.xlabel("Partiality (full)") pyplot.ylabel("Partiality (sliced)") pyplot.savefig("partiality_full_vs_sliced.png") pyplot.clf() pyplot.scatter(i0, i1, s=5, alpha=0.3, marker="+") pyplot.xlim(flex.min(i0), flex.max(i0)) pyplot.ylim(flex.min(i1), flex.max(i1)) pyplot.xlabel("Intensity (full)") pyplot.ylabel("Intensity (sliced)") pyplot.xscale("log") pyplot.yscale("log") pyplot.savefig("intensity_full_vs_sliced.png") pyplot.clf() i_ratio = i1 / i0 p_ratio = p1 / p0 pyplot.scatter(p_ratio, i_ratio, s=5, alpha=0.3, marker="+") pyplot.ylim(flex.min(i_ratio), flex.max(i_ratio)) pyplot.yscale("log") pyplot.xlabel("P(full)/P(sliced)") pyplot.ylabel("I(full)/I(sliced)") pyplot.savefig("partiality_ratio_vs_intensity_ratio.png") pyplot.clf()
def run(args): import libtbx.load_env usage = "%s [options]" %libtbx.env.dispatcher_name parser = OptionParser( usage=usage, phil=phil_scope, check_format=False, epilog=help_message) params, options, args = parser.parse_args(show_diff_phil=True, return_unhandled=True) assert len(args) == 2 from iotbx.reflection_file_reader import any_reflection_file xyz = [] intensities = [] lp_corrections = [] for f in args: xdet = None ydet = None rot = None i_sigi = None lp = None arrays = any_reflection_file(f).as_miller_arrays(merge_equivalents=False) for ma in arrays: print ma.info().labels if ma.info().labels[0] == 'XDET': xdet = ma elif ma.info().labels[0] == 'YDET': ydet = ma elif ma.info().labels[0] == 'ROT': rot = ma elif ma.info().labels == ['I', 'SIGI']: i_sigi = ma elif ma.info().labels[0] == 'LP': lp = ma assert [xdet, ydet, rot, i_sigi, lp].count(None) == 0 xyz.append(flex.vec3_double(xdet.data(), ydet.data(), rot.data())) intensities.append(i_sigi) lp_corrections.append(lp) xyz1, xyz2 = xyz xyz2 += (1e-3,1e-3,1e-3) intensities1, intensities2 = intensities lp1, lp2 = lp_corrections # Do the nn match from annlib_ext import AnnAdaptor as ann_adaptor ann = ann_adaptor(xyz1.as_double().as_1d(), 3) ann.query(xyz2.as_double().as_1d()) distances = flex.sqrt(ann.distances) matches = distances < 2 #pixels index1 = flex.size_t(list(ann.nn.select(matches))) index2 = flex.size_t(list(matches.iselection())) intensities1 = intensities1.select(index1) intensities2 = intensities2.select(index2) isigi1 = intensities1.data()/intensities1.sigmas() isigi2 = intensities2.data()/intensities2.sigmas() lp1 = lp1.select(index1) lp2 = lp2.select(index2) ##differences = intensities1.data() - intensities2.data() ##sums = intensities1.data() + intensities2.data() #differences = isigi1 - isigi2 #sums = isigi1 + isigi2 #assert sums.all_ne(0) #dos = differences/sums #mean_dos = [] #binner = intensities1.setup_binner_d_star_sq_step(d_star_sq_step=0.01) #d_spacings = intensities1.d_spacings().data() #for i in range(binner.n_bins_used()): #d_max, d_min = binner.bin_d_range(i+1) #bin_sel = (d_spacings > d_min) & (d_spacings <= d_max) #mean_dos.append(flex.mean(dos.select(bin_sel))) # set backend before importing pyplot import matplotlib matplotlib.use('Agg') from matplotlib import pyplot pyplot.scatter(intensities1.data(), intensities2.data(), marker='+', alpha=0.5) m = max(pyplot.xlim()[1], pyplot.ylim()[1]) pyplot.plot((0,m), (0, m), c='black') pyplot.savefig('scatter_intensities.png') pyplot.clf() pyplot.scatter(intensities1.sigmas(), intensities2.sigmas(), marker='+', alpha=0.5) m = max(pyplot.xlim()[1], pyplot.ylim()[1]) pyplot.plot((0,m), (0, m), c='black') pyplot.savefig('scatter_sigmas.png') pyplot.clf() pyplot.scatter( flex.pow2(intensities1.sigmas()), flex.pow2(intensities2.sigmas()), marker='+', alpha=0.5) m = max(pyplot.xlim()[1], pyplot.ylim()[1]) pyplot.plot((0,m), (0, m), c='black') pyplot.savefig('scatter_variances.png') pyplot.clf() pyplot.scatter(isigi1, isigi2, marker='+', alpha=0.5) m = max(pyplot.xlim()[1], pyplot.ylim()[1]) pyplot.plot((0,m), (0, m), c='black') pyplot.savefig('scatter_i_sig_i.png') pyplot.clf() pyplot.scatter(lp1.data(), lp2.data(), marker='+', alpha=0.5) m = max(pyplot.xlim()[1], pyplot.ylim()[1]) pyplot.plot((0,m), (0, m)) pyplot.savefig('scatter_LP.png') pyplot.clf() #from cctbx import uctbx #pyplot.scatter(uctbx.d_star_sq_as_d(binner.bin_centers(2)), mean_dos) #pyplot.savefig('mean_dos.png') #pyplot.clf() return
refl2 = flex.reflection_table.from_pickle(args[1]) mask = refl2['intensity.sum.value'] <= 0.0 refl2.del_selected(mask) mask = refl2['intensity.sum.value']**2 < refl2['intensity.sum.variance'] refl2.del_selected(mask) Command.end('Read %d reflections from %s' % (len(refl2), args[1])) # perform the match Command.start('Find matching reflections') hkl1 = refl1['miller_index'] hkl2 = refl2['miller_index'] xyz1 = refl1['xyzcal.px'] xyz2 = refl2['xyzcal.px'] # Do the nn match ann = ann_adaptor(xyz1.as_double().as_1d(), 3) ann.query(xyz2.as_double().as_1d()) # Select only those with matching hkl index = flex.size_t(i for i in ann.nn) hkl11 = hkl1.select(index) flags = hkl11 == hkl2 index = index.select(flags) refl1 = refl1.select(index) refl2 = refl2.select(flags) Command.end('Found %d matching reflections' % len(refl1)) # Do the comparison compare = CompareReflections(refl1, refl2) compare.intensities()
def reconstruct_rogues(params): assert os.path.exists("xia2.json") from xia2.Schema.XProject import XProject xinfo = XProject.from_json(filename="xia2.json") from dxtbx.model.experiment_list import ExperimentListFactory import six.moves.cPickle as pickle import dials # because WARNING:root:No profile class gaussian_rs registered crystals = xinfo.get_crystals() assert len(crystals) == 1 for xname in crystals: crystal = crystals[xname] scaler = crystal._get_scaler() epochs = scaler._sweep_handler.get_epochs() rogues = os.path.join(scaler.get_working_directory(), xname, "scale", "ROGUES") rogue_reflections = munch_rogues(rogues) batched_reflections = {} for epoch in epochs: si = scaler._sweep_handler.get_sweep_information(epoch) intgr = si.get_integrater() experiments = ExperimentListFactory.from_json_file( intgr.get_integrated_experiments()) with open(intgr.get_integrated_reflections(), "rb") as fh: reflections = pickle.load(fh) batched_reflections[si.get_batch_range()] = ( experiments, reflections, si.get_sweep_name(), ) # - look up reflection in reflection list, get bounding box # - pull pixels given from image set, flatten these, write out from dials.array_family import flex from annlib_ext import AnnAdaptor as ann_adaptor reflections_run = {} for run in batched_reflections: reflections_run[run] = [] for rogue in rogue_reflections: b = rogue[0] for run in batched_reflections: if b >= run[0] and b <= run[1]: reflections_run[run].append(rogue) break for run_no, run in enumerate(reflections_run): experiment = batched_reflections[run][0] reflections = batched_reflections[run][1] name = batched_reflections[run][2] rogues = reflections_run[run] reference = flex.double() scan = experiment.scans()[0] images = experiment.imagesets()[0] for xyz in reflections["xyzcal.px"]: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) search = flex.double() for rogue in rogues: search.append(rogue[1]) search.append(rogue[2]) search.append(scan.get_array_index_from_angle(rogue[3])) ann = ann_adaptor(data=reference, dim=3, k=1) ann.query(search) keep = flex.bool(len(reflections), False) for j, rogue in enumerate(rogues): keep[ann.nn[j]] = True reflections = reflections.select(keep == True) if params.extract: reflections["shoebox"] = flex.shoebox(reflections["panel"], reflections["bbox"], allocate=True) reflections.extract_shoeboxes(images, verbose=False) if len(reflections_run) > 1: output = params.output.reflections.replace(".refl", "-%s.refl" % name) print("Extracted %d rogue reflections for %s to %s" % (len(reflections), name, output)) reflections.as_pickle(output) else: output = params.output.reflections print("Extracted %d rogue reflections to %s" % (len(reflections), output)) reflections.as_pickle(output)
def compare_chunks(integrate_mtz, integrate_hkl): from cctbx.array_family import flex from annlib_ext import AnnAdaptor as ann_adaptor uc = integrate_mtz_to_unit_cell(integrate_mtz) rdx = derive_reindex_matrix(integrate_hkl, integrate_mtz) print rdx xhkl, xi, xsigi, xxyz = pull_reference(integrate_mtz) dhkl, di, dsigi, dxyz = pull_reference_xds(integrate_hkl) reference = flex.double() query = flex.double() for xyz in xxyz: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) for xyz in dxyz: query.append(xyz[0]) query.append(xyz[1]) query.append(xyz[2]) # perform the match ann = ann_adaptor(data = reference, dim = 3, k = 1) ann.query(query) MOS = [] XDS = [] HKL = [] # perform the analysis for j, hkl in enumerate(dhkl): c = ann.nn[j] if hkl == tuple(rdx * xhkl[c]): MOS.append(xi[c]) XDS.append(di[j]) HKL.append(hkl) # now compute resolution for every reflection - or at least each unique # Miller index... unique = set(HKL) resolutions = { } for hkl in unique: resolutions[hkl] = uc.d(hkl) # then resort the list in terms of resolution, then reverse it sort_me = [] for hkl, mos, xds in zip(HKL, MOS, XDS): sort_me.append((resolutions[hkl], mos, xds)) sort_me.sort() sort_me.reverse() resolutions = [sm[0] for sm in sort_me] MOS = [sm[1] for sm in sort_me] XDS = [sm[2] for sm in sort_me] # then extract the original observation structure print 'Paired %d observations' % len(MOS) scale = sum(MOS) / sum(XDS) chunks = [(i, i + 1000) for i in range(0, len(MOS), 1000)] ccs = [] rs = [] ss = [] for chunk in chunks: mos = MOS[chunk[0]:chunk[1]] xds = XDS[chunk[0]:chunk[1]] resols = resolutions[chunk[0]:chunk[1]] if len(mos) < 100: break c = cc(xds, mos) r, s = R(xds, mos) print '%7d %4d %.3f %.3f %.3f %.3f %.3f' % (chunk[0], len(mos), min(resols), max(resols), c, r, s) ccs.append(c) rs.append(r) ss.append(s) chunks = [j for j in range(len(chunks))] # kludge - if we fall off chunks = chunks[:len(rs)] from matplotlib import pyplot pyplot.xlabel('Chunk') pyplot.ylabel('Statistic') pyplot.title('Statistics for 1000 reflection-pair chunks') pyplot.plot(chunks, ccs, label = 'CC') pyplot.plot(chunks, rs, label = 'R') pyplot.plot(chunks, ss, label = 'K') pyplot.legend() pyplot.savefig('plot-xds-vs-mosflm.png') pyplot.close() return
def compare_chunks(integrate_hkl, integrate_pkl, experiments_json, d_min=0.0): from cctbx.array_family import flex from annlib_ext import AnnAdaptor as ann_adaptor rdx = derive_reindex_matrix(experiments_json, integrate_hkl) print "Reindex matrix:\n%d %d %d\n%d %d %d\n%d %d %d" % (rdx.elems) uc = integrate_hkl_to_unit_cell(integrate_hkl) xhkl, xi, xsigi, xxyz, xlp = pull_reference(integrate_hkl, d_min=d_min) dhkl, di, dsigi, dxyz, dlp = pull_calculated(integrate_pkl) reference = flex.double() query = flex.double() for xyz in xxyz: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) for xyz in dxyz: query.append(xyz[0]) query.append(xyz[1]) query.append(xyz[2]) # perform the match ann = ann_adaptor(data=reference, dim=3, k=1) ann.query(query) XDS = [] DIALS = [] HKL = [] # perform the analysis for j, hkl in enumerate(dhkl): c = ann.nn[j] if hkl == tuple(rdx * xhkl[c]): XDS.append(xi[c]) DIALS.append(di[j]) HKL.append(hkl) # now compute resolution for every reflection - or at least each unique # Miller index... unique = set(HKL) resolutions = {} for hkl in unique: resolutions[hkl] = uc.d(hkl) # then resort the list in terms of resolution, then reverse it sort_me = [] for hkl, xds, dials in zip(HKL, XDS, DIALS): sort_me.append((resolutions[hkl], xds, dials)) sort_me.sort() sort_me.reverse() resolutions = [sm[0] for sm in sort_me] XDS = [sm[1] for sm in sort_me] DIALS = [sm[2] for sm in sort_me] # then extract the original observation structure print "Paired %d observations" % len(XDS) scale = sum(XDS) / sum(DIALS) chunks = [(i, i + 1000) for i in range(0, len(XDS), 1000)] ccs = [] rs = [] ss = [] for chunk in chunks: xds = XDS[chunk[0] : chunk[1]] dials = DIALS[chunk[0] : chunk[1]] resols = resolutions[chunk[0] : chunk[1]] if len(xds) < 100: break c = cc(dials, xds) r, s = R(dials, xds) print "%7d %4d %.3f %.3f %.3f %.3f %.3f" % (chunk[0], len(xds), min(resols), max(resols), c, r, s) ccs.append(c) rs.append(r) ss.append(s) chunks = [j for j in range(len(chunks))] # kludge - if we fall off chunks = chunks[: len(rs)] from matplotlib import pyplot pyplot.xlabel("Chunk") pyplot.ylabel("Statistic") pyplot.title("Statistics for 1000 reflection-pair chunks") pyplot.plot(chunks, ccs, label="CC") pyplot.plot(chunks, rs, label="R") pyplot.plot(chunks, ss, label="K") pyplot.legend() pyplot.savefig("plot-vs-xds.png") pyplot.close() return
def compare_chunks(integrate_hkl, integrate_pkl, crystal_json, sweep_json, d_min=0.0): from cctbx.array_family import flex from annlib_ext import AnnAdaptor as ann_adaptor from dials.model.serialize import load sweep = load.sweep(sweep_json) rdx = derive_reindex_matrix(crystal_json, sweep_json, integrate_hkl) print 'Reindex matrix:\n%d %d %d\n%d %d %d\n%d %d %d' % (rdx.elems) uc = integrate_hkl_to_unit_cell(integrate_hkl) xhkl, xi, xsigi, xxyz, xlp = pull_reference(integrate_hkl, d_min=d_min) dhkl, di, dsigi, dxyz, dlp = pull_calculated(integrate_pkl) reference = flex.double() query = flex.double() for xyz in xxyz: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) for xyz in dxyz: query.append(xyz[0]) query.append(xyz[1]) query.append(xyz[2]) # perform the match ann = ann_adaptor(data=reference, dim=3, k=1) ann.query(query) XDS = [] DIALS = [] HKL = [] XYZ = [] SIGMA_XDS = [] SIGMA_DIALS = [] XLP = [] DLP = [] # perform the analysis for j, hkl in enumerate(dhkl): c = ann.nn[j] if hkl == tuple(rdx * xhkl[c]): XDS.append(xi[c]) DIALS.append(di[j]) HKL.append(hkl) XYZ.append(dxyz[j]) SIGMA_XDS.append(xsigi[c]) SIGMA_DIALS.append(dsigi[j]) XLP.append(xlp[c]) DLP.append(dlp[j]) print "Found %d matches" % len(XDS) compare = CompareIntensity(sweep, uc, HKL, XYZ, XDS, DIALS, SIGMA_XDS, SIGMA_DIALS, XLP, DLP) # compare.plot_scale_factor_vs_resolution() # compare.plot_scale_factor_vs_frame_number() compare.plot_chunked_statistics_vs_resolution() compare.plot_chunked_statistics_vs_frame_number() compare.plot_chunked_statistics_vs_i_over_sigma() compare.plot_chunked_i_over_sigma_vs_frame_number() compare.plot_chunked_resolution_vs_frame_number() compare.plot_chunked_lp_vs_frame_number() compare.plot_scale_vs_x_y() compare.plot_scale_vs_i_over_sigma()
def compare_chunks(integrate_hkl, integrate_pkl, experiments_json, d_min = 0.0): from cctbx.array_family import flex from annlib_ext import AnnAdaptor as ann_adaptor rdx = derive_reindex_matrix(experiments_json, integrate_hkl) print 'Reindex matrix:\n%d %d %d\n%d %d %d\n%d %d %d' % (rdx.elems) uc = integrate_hkl_to_unit_cell(integrate_hkl) xhkl, xi, xsigi, xxyz, xlp = pull_reference(integrate_hkl, d_min=d_min) dhkl, di, dsigi, dxyz, dlp = pull_calculated(integrate_pkl) reference = flex.double() query = flex.double() for xyz in xxyz: reference.append(xyz[0]) reference.append(xyz[1]) reference.append(xyz[2]) for xyz in dxyz: query.append(xyz[0]) query.append(xyz[1]) query.append(xyz[2]) # perform the match ann = ann_adaptor(data = reference, dim = 3, k = 1) ann.query(query) XDS = [] DIALS = [] HKL = [] # perform the analysis for j, hkl in enumerate(dhkl): c = ann.nn[j] if hkl == tuple(rdx * xhkl[c]): XDS.append(xi[c]) DIALS.append(di[j]) HKL.append(hkl) # now compute resolution for every reflection - or at least each unique # Miller index... unique = set(HKL) resolutions = { } for hkl in unique: resolutions[hkl] = uc.d(hkl) # then resort the list in terms of resolution, then reverse it sort_me = [] for hkl, xds, dials in zip(HKL, XDS, DIALS): sort_me.append((resolutions[hkl], xds, dials)) sort_me.sort() sort_me.reverse() resolutions = [sm[0] for sm in sort_me] XDS = [sm[1] for sm in sort_me] DIALS = [sm[2] for sm in sort_me] # then extract the original observation structure print 'Paired %d observations' % len(XDS) scale = sum(XDS) / sum(DIALS) chunks = [(i, i + 1000) for i in range(0, len(XDS), 1000)] ccs = [] rs = [] ss = [] for chunk in chunks: xds = XDS[chunk[0]:chunk[1]] dials = DIALS[chunk[0]:chunk[1]] resols = resolutions[chunk[0]:chunk[1]] if len(xds) < 100: break c = cc(dials, xds) r, s = R(dials, xds) print '%7d %4d %.3f %.3f %.3f %.3f %.3f' % \ (chunk[0], len(xds), min(resols), max(resols), c, r, s) ccs.append(c) rs.append(r) ss.append(s) chunks = [j for j in range(len(chunks))] # kludge - if we fall off chunks = chunks[:len(rs)] from matplotlib import pyplot pyplot.xlabel('Chunk') pyplot.ylabel('Statistic') pyplot.title('Statistics for 1000 reflection-pair chunks') pyplot.plot(chunks, ccs, label = 'CC') pyplot.plot(chunks, rs, label = 'R') pyplot.plot(chunks, ss, label = 'K') pyplot.legend() pyplot.savefig('plot-vs-xds.png') pyplot.close()
def saturation_analysis(data_files, value_column): import six.moves.cPickle as pickle import math from dials.array_family import flex from dials.util.add_hash import add_hash, dehash from annlib_ext import AnnAdaptor as ann_adaptor reference = data_files[0] rest = data_files[1:] with open(reference, 'rb') as fh: reference_data = pickle.load(fh) assert value_column in reference_data variance_column = None for column in [ '%s.variance' % value_column, value_column.replace('value', 'variance') ]: if column in reference_data: variance_column = column break assert (variance_column) # construct XYZ pixel position search target reference_data = strip_not_integrated(reference_data) # keep only data with I/sig(I) > 3 for reference strong = (reference_data[value_column] > 3 * flex.sqrt(reference_data[variance_column])) print('Keeping %d strong reflections of %d' % (strong.count(True), len(reference_data))) reference_data = reference_data.select(strong) xyz = reference_data['xyzcal.px'].as_double() ann = ann_adaptor(data=xyz, dim=3, k=1) for qpno, query_pickle in enumerate(rest): x = flex.double() y = flex.double() fout = open('matches%02d.dat' % qpno, 'w') with open(query_pickle, 'rb') as fh: query_data = strip_not_integrated(pickle.load(fh)) qxyz = query_data['xyzcal.px'].as_double() ann.query(qxyz) matches = 0 for j, refl in enumerate(query_data): rrefl = reference_data[ann.nn[j]] if refl['miller_index'] == rrefl['miller_index']: fout.write('%d %d %d ' % refl['miller_index'] + '%f %f ' % (rrefl[value_column], rrefl[variance_column]) + '%f %f ' % (refl[value_column], refl[variance_column]) + '%f %f %f\n' % refl['xyzcal.px']) matches += 1 x.append(rrefl[value_column]) y.append(refl[value_column]) print('For %s matched %d/%d' % (query_pickle, matches, len(query_data))) fout.close() from matplotlib import pyplot pyplot.scatter(x.as_numpy_array(), y.as_numpy_array()) pyplot.show()