def _analyse_periodogram(self, pgram): """Use the periodogram pgram to suggest a suitable interval width for scan-varying refinement to account for the major variation in residuals""" # determine a baseline from the high frequency noise bl = flex.median(pgram.spec.select(pgram.freq > 0.25)) # look for peaks greater than 5 times this baseline cutoff = 5 * bl peaks = pgram.spec > cutoff # find where this peak falls off below the cutoff and return the cycle # period at that frequency idx = flex.last_index(peaks, True) if idx is not None: f1 = pgram.freq[idx] s1 = pgram.spec[idx] try: f2 = pgram.freq[idx + 1] s2 = pgram.spec[idx + 1] ds = cutoff - s1 df = (f2 - f1) * ds / (s2 - s1) freq = f1 + df except IndexError: freq = f1 period = 1./freq else: period = None return period
def plot_histograms(self, reflections, panel = None, ax = None, bounds = None): data = reflections['difference_vector_norms'] colors = ['b-', 'g-', 'g--', 'r-', 'b-', 'b--'] n_slots = 20 if self.params.residuals.histogram_max is None: h = flex.histogram(data, n_slots=n_slots) else: h = flex.histogram(data.select(data <= self.params.residuals.histogram_max), n_slots=n_slots) n = len(reflections) rmsd_obs = math.sqrt((reflections['xyzcal.mm']-reflections['xyzobs.mm.value']).sum_sq()/n) sigma = mode = h.slot_centers()[list(h.slots()).index(flex.max(h.slots()))] mean_obs = flex.mean(data) median = flex.median(data) mean_rayleigh = math.sqrt(math.pi/2)*sigma rmsd_rayleigh = math.sqrt(2)*sigma data = flex.vec2_double([(i,j) for i, j in zip(h.slot_centers(), h.slots())]) n = len(data) for i in [mean_obs, mean_rayleigh, mode, rmsd_obs, rmsd_rayleigh]: data.extend(flex.vec2_double([(i, 0), (i, flex.max(h.slots()))])) data = self.get_bounded_data(data, bounds) tmp = [data[:n]] for i in xrange(len(colors)): tmp.append(data[n+(i*2):n+((i+1)*2)]) data = tmp for d, c in zip(data, colors): ax.plot(d.parts()[0], d.parts()[1], c) if ax.get_legend() is None: ax.legend([r"$\Delta$XY", "MeanObs", "MeanRayl", "Mode", "RMSDObs", "RMSDRayl"])
def determine_best_unit_cell(experiments): """Set the median unit cell as the best cell, for consistent d-values across experiments.""" uc_params = [flex.double() for i in range(6)] for exp in experiments: for i, p in enumerate(exp.crystal.get_unit_cell().parameters()): uc_params[i].append(p) best_unit_cell = uctbx.unit_cell( parameters=[flex.median(p) for p in uc_params]) if len(experiments) > 1: logger.info("Using median unit cell across experiments : %s", best_unit_cell) return best_unit_cell
def histogram(self, reflections, title): data = reflections['difference_vector_norms'] n_slots = 100 if self.params.residuals.histogram_max is None: h = flex.histogram(data, n_slots=n_slots) else: h = flex.histogram(data.select(data <= self.params.residuals.histogram_max), n_slots=n_slots) n = len(reflections) rmsd = math.sqrt((reflections['xyzcal.mm']-reflections['xyzobs.mm.value']).sum_sq()/n) sigma = mode = h.slot_centers()[list(h.slots()).index(flex.max(h.slots()))] mean = flex.mean(data) median = flex.median(data) print "RMSD (microns)", rmsd * 1000 print "Histogram mode (microns):", mode * 1000 print "Overall mean (microns):", mean * 1000 print "Overall median (microns):", median * 1000 mean2 = math.sqrt(math.pi/2)*sigma rmsd2 = math.sqrt(2)*sigma print "Rayleigh Mean (microns)", mean2 * 1000 print "Rayleigh RMSD (microns)", rmsd2 * 1000 r = reflections['radial_displacements'] t = reflections['transverse_displacements'] print "Overall radial RMSD (microns)", math.sqrt(flex.sum_sq(r)/len(r)) * 1000 print "Overall transverse RMSD (microns)", math.sqrt(flex.sum_sq(t)/len(t)) * 1000 fig = plt.figure() ax = fig.add_subplot(111) ax.plot(h.slot_centers().as_numpy_array(), h.slots().as_numpy_array(), '-') vmax = self.params.residuals.plot_max if self.params.residuals.histogram_xmax is not None: ax.set_xlim((0,self.params.residuals.histogram_xmax)) if self.params.residuals.histogram_ymax is not None: ax.set_ylim((0,self.params.residuals.histogram_ymax)) plt.title(title) ax.plot((mean, mean), (0, flex.max(h.slots())), 'g-') ax.plot((mean2, mean2), (0, flex.max(h.slots())), 'g--') ax.plot((mode, mode), (0, flex.max(h.slots())), 'r-') ax.plot((rmsd, rmsd), (0, flex.max(h.slots())), 'b-') ax.plot((rmsd2, rmsd2), (0, flex.max(h.slots())), 'b--') ax.legend([r"$\Delta$XY", "MeanObs", "MeanRayl", "Mode", "RMSDObs", "RMSDRayl"]) ax.set_xlabel("(mm)") ax.set_ylabel("Count")
def _analyse_periodogram(self, pgram): """Use the periodogram pgram to suggest a suitable interval width for scan-varying refinement to account for the major variation in residuals""" if pgram is None: return None # determine a baseline from the high frequency noise bl = flex.median(pgram.spec.select(pgram.freq > 0.25)) # look for peaks greater than 5 times this baseline. We expect one at # low frequency cutoff = 5 * bl peaks = pgram.spec > cutoff # find where this peak falls off below the cutoff and return the cycle # period at half that frequency (this is a heuristic that often seems to # give sensible results) pk_start = flex.first_index(peaks, True) if pk_start is None: return None peaks = peaks[pk_start:] idx = pk_start + flex.first_index(peaks, False) - 1 if idx is not None: f1 = pgram.freq[idx] s1 = pgram.spec[idx] try: f2 = pgram.freq[idx + 1] s2 = pgram.spec[idx + 1] ds = cutoff - s1 df = (f2 - f1) * ds / (s2 - s1) freq = f1 + df except IndexError: freq = f1 period = 2.0 * 1.0 / freq else: period = None return period
def run(args): sweep_directories = [] templates = [] n_strong_spots = flex.int() n_strong_spots_dmin_4 = flex.int() d_strong_spots_99th_percentile = flex.double() d_strong_spots_95th_percentile = flex.double() d_strong_spots_50th_percentile = flex.double() n_unindexed_spots = flex.int() n_indexed_lattices = flex.int() n_integrated_lattices = flex.int() sweep_dir_cryst = flex.std_string() orig_dir = os.path.abspath(os.curdir) rmsds = flex.vec3_double() cell_params = flex.sym_mat3_double() n_indexed = flex.double() d_min_indexed = flex.double() rmsds = flex.vec3_double() nproc = easy_mp.get_processes(libtbx.Auto) # nproc = 1 results = easy_mp.parallel_map( func=run_once, iterable=args, processes=nproc, method="multiprocessing", preserve_order=True, asynchronous=True, preserve_exception_message=True, ) for result in results: if result is None: continue sweep_directories.append(result.sweep_dir) templates.append(result.template) n_strong_spots.append(result.n_strong_spots) n_strong_spots_dmin_4.append(result.n_strong_spots_dmin_4) n_unindexed_spots.append(result.n_unindexed_spots) n_indexed_lattices.append(result.n_indexed_lattices) n_integrated_lattices.append(result.n_integrated_lattices) d_strong_spots_50th_percentile.append( result.d_strong_spots_50th_percentile) d_strong_spots_95th_percentile.append( result.d_strong_spots_95th_percentile) d_strong_spots_99th_percentile.append( result.d_strong_spots_99th_percentile) cell_params.extend(result.cell_params) n_indexed.extend(result.n_indexed) d_min_indexed.extend(result.d_min_indexed) rmsds.extend(result.rmsds) sweep_dir_cryst.extend(result.sweep_dir_cryst) table_data = [( "sweep_dir", "template", "#strong_spots", "#unindexed_spots", "#lattices", "d_spacing_50th_percentile", "d_spacing_95th_percentile", "d_spacing_99th_percentile", )] for i in range(len(sweep_directories)): table_data.append(( sweep_directories[i], templates[i], str(n_strong_spots[i]), str(n_unindexed_spots[i]), str(n_indexed_lattices[i]), str(d_strong_spots_50th_percentile[i]), str(d_strong_spots_95th_percentile[i]), str(d_strong_spots_99th_percentile[i]), )) with open("results.txt", "wb") as f: print(table_utils.format(table_data, has_header=True, justify="right"), file=f) table_data = [( "sweep_dir", "cell_a", "cell_b", "cell_c", "alpha", "beta", "gamma", "#indexed_reflections", "d_min_indexed", "rmsd_x", "rmsd_y", "rmsd_phi", )] for i in range(len(cell_params)): table_data.append(( sweep_dir_cryst[i], str(cell_params[i][0]), str(cell_params[i][1]), str(cell_params[i][2]), str(cell_params[i][3]), str(cell_params[i][4]), str(cell_params[i][5]), str(n_indexed[i]), str(d_min_indexed[i]), str(rmsds[i][0]), str(rmsds[i][1]), str(rmsds[i][2]), )) with open("results_indexed.txt", "wb") as f: print(table_utils.format(table_data, has_header=True, justify="right"), file=f) cell_a = flex.double([params[0] for params in cell_params]) cell_b = flex.double([params[1] for params in cell_params]) cell_c = flex.double([params[2] for params in cell_params]) cell_alpha = flex.double([params[3] for params in cell_params]) cell_beta = flex.double([params[4] for params in cell_params]) cell_gamma = flex.double([params[5] for params in cell_params]) from matplotlib import pyplot from matplotlib.backends.backend_pdf import PdfPages pyplot.rc("font", family="serif") pyplot.rc("font", serif="Times New Roman") red, blue = "#B2182B", "#2166AC" hist = flex.histogram(n_strong_spots_dmin_4.as_double(), n_slots=20) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.bar( hist.slot_centers(), hist.slots(), width=0.75 * hist.slot_width(), color=blue, edgecolor=blue, ) ax.set_xlabel("Spot count") ax.set_ylabel("Frequency") pdf = PdfPages("spot_count_histogram.pdf") pdf.savefig(fig) pdf.close() # pyplot.show() hist = flex.histogram(n_indexed_lattices.as_double(), n_slots=flex.max(n_indexed_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.bar( range(int(hist.data_max())), hist.slots(), width=0.75 * hist.slot_width(), align="center", color=blue, edgecolor=blue, ) ax.set_xlim(-0.5, hist.data_max() - 0.5) ax.set_xticks(range(0, int(hist.data_max()))) ax.set_xlabel("Number of indexed lattices") ax.set_ylabel("Frequency") pdf = PdfPages("n_indexed_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() # pyplot.show() if flex.max(n_integrated_lattices) > 0: hist = flex.histogram(n_integrated_lattices.as_double(), n_slots=flex.max(n_integrated_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.bar( range(int(hist.data_max())), hist.slots(), width=0.75 * hist.slot_width(), align="center", color=blue, edgecolor=blue, ) ax.set_xlim(-0.5, hist.data_max() - 0.5) ax.set_xticks(range(0, int(hist.data_max()))) ax.set_xlabel("Number of integrated lattices") ax.set_ylabel("Frequency") pdf = PdfPages("n_integrated_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() # pyplot.show() fig, axes = pyplot.subplots(nrows=2, ncols=3, squeeze=False) for i, cell_param in enumerate( (cell_a, cell_b, cell_c, cell_alpha, cell_beta, cell_gamma)): ax = axes.flat[i] flex.min_max_mean_double(cell_param).show() print(flex.median(cell_param)) hist = flex.histogram(cell_param, n_slots=20) hist.show() ax.bar( hist.slot_centers(), hist.slots(), width=0.75 * hist.slot_width(), color=blue, edgecolor=blue, ) ax.set_xlabel("Cell parameter") ax.set_ylabel("Frequency") pyplot.tight_layout() pdf = PdfPages("cell_parameters.pdf") pdf.savefig(fig) pdf.close()
def median_unit_cell(experiments): uc_params = [flex.double() for i in range(6)] for c in experiments.crystals(): for i, p in enumerate(c.get_unit_cell().parameters()): uc_params[i].append(p) return uctbx.unit_cell(parameters=[flex.median(p) for p in uc_params])
def test_kapton(run_in_tmpdir): """ Test script for kapton correction applied to integrated data. Currently only testing kapton 2019 correction on rayonix-340 at LCLS xfel_regression folder needs to be present in modules directory for test to run""" xfel_regression = libtbx.env.find_in_repositories( relative_path="xfel_regression", test=os.path.isdir) if not xfel_regression: pytest.skip("test requires xfel_regression") kapton_test_data = os.path.join(xfel_regression, "kapton_test_data", "rayonix340") image_file = os.path.join(kapton_test_data, "hit-20181213155134902.cbf") mask_file = os.path.join(kapton_test_data, "mask_rayonix340mx_4x4.pickle") geom_file = os.path.join(kapton_test_data, "experiments_000.json") # Create phil files for the two sitations being tests # a. without kapton # b. with kapton stills_process_input = parse("""spotfinder.lookup.mask=%s\n integration.lookup.mask=%s\n input.reference_geometry=%s\n spotfinder.filter.min_spot_size=2\n spotfinder.filter.d_min=2\n spotfinder.filter.d_max=18\n spotfinder.threshold.dispersion.gain=0.46\n spotfinder.threshold.dispersion.global_threshold=100\n indexing.known_symmetry.space_group='P 21 21 21'\n indexing.known_symmetry.unit_cell='41.9 75.7 102 90 90 90'\n indexing.refinement_protocol.d_min_start=2\n integration.debug.output=True\n integration.debug.separate_files=False\n integration.debug.delete_shoeboxes=True\n profile.gaussian_rs.centroid_definition=com\n """ % (mask_file, mask_file, geom_file)) kapton_input = parse(""" integration { absorption_correction { apply=True algorithm=kapton_2019 fuller_kapton { xtal_height_above_kapton_mm { value=0.04 } rotation_angle_deg { value=0.55 } kapton_half_width_mm { value=0.665 } kapton_thickness_mm { value=0.025 } smart_sigmas=True } } }""") with open("params_without_kapton.phil", "w") as fout: fout.write(stills_process_input.as_str()) fout.write( "output.integrated_filename=without_kapton.mpack\noutput.integrated_experiments_filename=without_kapton.expt" ) with open("params_with_kapton.phil", "w") as fout: fout.write(stills_process_input.as_str() + kapton_input.as_str()) fout.write( "output.integrated_filename=with_kapton.mpack\noutput.integrated_experiments_filename=with_kapton.expt" ) command_without_kapton = "dials.stills_process %s params_without_kapton.phil" % ( image_file) command_with_kapton = "dials.stills_process %s params_with_kapton.phil" % ( image_file) libtbx.easy_run.fully_buffered( command=command_without_kapton, stdout_splitlines=True) # .raise_if_errors() libtbx.easy_run.fully_buffered( command=command_with_kapton, stdout_splitlines=True) # .raise_if_errors() # Now compare the 2 experimental results # Currently just comparing the median values to get a sense of the effect if the kapton and whether it is being applied correctly expt_without_kapton = ExperimentListFactory.from_json_file( "without_kapton.expt", check_format=False) refl_without_kapton = flex.reflection_table.from_file( "without_kapton.mpack") expt_with_kapton = ExperimentListFactory.from_json_file("with_kapton.expt", check_format=False) refl_with_kapton = flex.reflection_table.from_file("with_kapton.mpack") without_kapton_medians = [] with_kapton_medians = [] count = 0 for experiments, reflections in zip( (expt_without_kapton, expt_with_kapton), (refl_without_kapton, refl_with_kapton)): all_x, all_y, all_i = flex.double(), flex.double(), flex.double() for expt_id, experiment in enumerate(experiments): refls = reflections.select(reflections["id"] == expt_id) for panel_id, panel in enumerate(experiment.detector): panel_refls = refls.select(refls["panel"] == panel_id) x, y, z = panel_refls["xyzobs.px.value"].parts() for i in range(len(panel_refls)): lab_x, lab_y, lab_z = panel.get_pixel_lab_coord( (x[i], y[i])) all_x.append(lab_x) all_y.append(lab_y) all_i.append(panel_refls["intensity.sum.value"][i]) for sel in all_x <= 0, all_x > 0, all_y <= 0, all_y > 0: if count == 0: without_kapton_medians.append(flex.median(all_i.select(sel))) if count == 1: with_kapton_medians.append(flex.median(all_i.select(sel))) count += 1 # Now compare results between uncorrected and corrected data # x < 0 where the kapton shadow is assert without_kapton_medians[0] < with_kapton_medians[0] # x > 0 where no kapton shadow present assert without_kapton_medians[1] == pytest.approx(with_kapton_medians[1], abs=0.1) # y < 0; kapton correction should average out but should be slightly higher assert without_kapton_medians[2] == pytest.approx(with_kapton_medians[2], abs=5.0) assert without_kapton_medians[2] < with_kapton_medians[2] # y < 0; kapton correction should average out but should be slightly higher assert without_kapton_medians[3] == pytest.approx(with_kapton_medians[3], abs=5.0) assert without_kapton_medians[3] < with_kapton_medians[3]
def run(args): sweep_directories = [] templates = [] n_strong_spots = flex.int() n_strong_spots_dmin_4 = flex.int() d_strong_spots_99th_percentile = flex.double() d_strong_spots_95th_percentile = flex.double() d_strong_spots_50th_percentile = flex.double() n_unindexed_spots = flex.int() n_indexed_lattices = flex.int() n_integrated_lattices = flex.int() sweep_dir_cryst = flex.std_string() orig_dir = os.path.abspath(os.curdir) rmsds = flex.vec3_double() cell_params = flex.sym_mat3_double() n_indexed = flex.double() d_min_indexed = flex.double() rmsds = flex.vec3_double() nproc = easy_mp.get_processes(libtbx.Auto) #nproc = 1 results = easy_mp.parallel_map( func=run_once, iterable=args, processes=nproc, method="multiprocessing", preserve_order=True, asynchronous=True, preserve_exception_message=True, ) for result in results: if result is None: continue sweep_directories.append(result.sweep_dir) templates.append(result.template) n_strong_spots.append(result.n_strong_spots) n_strong_spots_dmin_4.append(result.n_strong_spots_dmin_4) n_unindexed_spots.append(result.n_unindexed_spots) n_indexed_lattices.append(result.n_indexed_lattices) n_integrated_lattices.append(result.n_integrated_lattices) d_strong_spots_50th_percentile.append(result.d_strong_spots_50th_percentile) d_strong_spots_95th_percentile.append(result.d_strong_spots_95th_percentile) d_strong_spots_99th_percentile.append(result.d_strong_spots_99th_percentile) cell_params.extend(result.cell_params) n_indexed.extend(result.n_indexed) d_min_indexed.extend(result.d_min_indexed) rmsds.extend(result.rmsds) sweep_dir_cryst.extend(result.sweep_dir_cryst) table_data = [('sweep_dir', 'template', '#strong_spots', '#unindexed_spots', '#lattices', 'd_spacing_50th_percentile', 'd_spacing_95th_percentile', 'd_spacing_99th_percentile',)] for i in range(len(sweep_directories)): table_data.append((sweep_directories[i], templates[i], str(n_strong_spots[i]), str(n_unindexed_spots[i]), str(n_indexed_lattices[i]), str(d_strong_spots_50th_percentile[i]), str(d_strong_spots_95th_percentile[i]), str(d_strong_spots_99th_percentile[i]), )) with open('results.txt', 'wb') as f: print >> f, table_utils.format( table_data, has_header=True, justify='right') table_data = [('sweep_dir', 'cell_a', 'cell_b', 'cell_c', 'alpha', 'beta', 'gamma', '#indexed_reflections', 'd_min_indexed', 'rmsd_x', 'rmsd_y', 'rmsd_phi')] for i in range(len(cell_params)): table_data.append((sweep_dir_cryst[i], str(cell_params[i][0]), str(cell_params[i][1]), str(cell_params[i][2]), str(cell_params[i][3]), str(cell_params[i][4]), str(cell_params[i][5]), str(n_indexed[i]), str(d_min_indexed[i]), str(rmsds[i][0]), str(rmsds[i][1]), str(rmsds[i][2]), )) with open('results_indexed.txt', 'wb') as f: print >> f, table_utils.format( table_data, has_header=True, justify='right') cell_a = flex.double([params[0] for params in cell_params]) cell_b = flex.double([params[1] for params in cell_params]) cell_c = flex.double([params[2] for params in cell_params]) cell_alpha = flex.double([params[3] for params in cell_params]) cell_beta = flex.double([params[4] for params in cell_params]) cell_gamma = flex.double([params[5] for params in cell_params]) from matplotlib import pyplot from matplotlib.backends.backend_pdf import PdfPages pyplot.rc('font', family='serif') pyplot.rc('font', serif='Times New Roman') red, blue = '#B2182B', '#2166AC' hist = flex.histogram(n_strong_spots_dmin_4.as_double(), n_slots=20) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(), color=blue, edgecolor=blue) ax.set_xlabel('Spot count') ax.set_ylabel('Frequency') pdf = PdfPages("spot_count_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() hist = flex.histogram(n_indexed_lattices.as_double(), n_slots=flex.max(n_indexed_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(range(int(hist.data_max())), hist.slots(), width=0.75*hist.slot_width(), align='center', color=blue, edgecolor=blue) ax.set_xlim(-0.5, hist.data_max()-0.5) ax.set_xticks(range(0,int(hist.data_max()))) ax.set_xlabel('Number of indexed lattices') ax.set_ylabel('Frequency') pdf = PdfPages("n_indexed_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() if flex.max(n_integrated_lattices) > 0: hist = flex.histogram(n_integrated_lattices.as_double(), n_slots=flex.max(n_integrated_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(range(int(hist.data_max())), hist.slots(), width=0.75*hist.slot_width(), align='center', color=blue, edgecolor=blue) ax.set_xlim(-0.5, hist.data_max()-0.5) ax.set_xticks(range(0,int(hist.data_max()))) ax.set_xlabel('Number of integrated lattices') ax.set_ylabel('Frequency') pdf = PdfPages("n_integrated_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() fig, axes = pyplot.subplots(nrows=2, ncols=3, squeeze=False) for i, cell_param in enumerate( (cell_a, cell_b, cell_c, cell_alpha, cell_beta, cell_gamma)): ax = axes.flat[i] flex.min_max_mean_double(cell_param).show() print flex.median(cell_param) hist = flex.histogram(cell_param, n_slots=20) hist.show() ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(), color=blue, edgecolor=blue) ax.set_xlabel('Cell parameter') ax.set_ylabel('Frequency') pyplot.tight_layout() pdf = PdfPages("cell_parameters.pdf") pdf.savefig(fig) pdf.close()