def filter_dose(self, dose_min, dose_max): from dials.command_line.slice_sweep import slice_experiments, slice_reflections image_range = [ ( max(dose_min, expt.scan.get_image_range()[0]), min(dose_max, expt.scan.get_image_range()[1]), ) for expt in self._experiments ] n_refl_before = self._reflections.size() self._experiments = slice_experiments(self._experiments, image_range) flex.min_max_mean_double(self._reflections["xyzobs.px.value"].parts()[2]).show() self._reflections = slice_reflections(self._reflections, image_range) flex.min_max_mean_double(self._reflections["xyzobs.px.value"].parts()[2]).show() logger.info( "%i reflections out of %i remaining after filtering for dose" % (self._reflections.size(), n_refl_before) )
def filter_dose(self, dose_min, dose_max): from dials.command_line.slice_sequence import ( slice_experiments, slice_reflections, ) keep_expts = [] for i, expt in enumerate(self._experiments): start, end = expt.scan.get_image_range() if (start <= dose_min <= end) or (start <= dose_max <= end): keep_expts.append(expt.identifier) else: logger.info( f"Removing experiment {expt.identifier} (image range {start, end} does not overlap with dose range)" ) if len(keep_expts): logger.info( f"Selecting {len(keep_expts)} experiments that overlap with dose range" ) self.select(keep_expts) image_range = [( max(dose_min, expt.scan.get_image_range()[0]), min(dose_max, expt.scan.get_image_range()[1]), ) for expt in self._experiments] n_refl_before = self._reflections.size() self._experiments = slice_experiments(self._experiments, image_range) flex.min_max_mean_double( self._reflections["xyzobs.px.value"].parts()[2]).show() self._reflections = slice_reflections(self._reflections, image_range) flex.min_max_mean_double( self._reflections["xyzobs.px.value"].parts()[2]).show() logger.info( "%i reflections out of %i remaining after filtering for dose" % (self._reflections.size(), n_refl_before))
def run(args): sweep_directories = [] templates = [] n_strong_spots = flex.int() n_strong_spots_dmin_4 = flex.int() d_strong_spots_99th_percentile = flex.double() d_strong_spots_95th_percentile = flex.double() d_strong_spots_50th_percentile = flex.double() n_unindexed_spots = flex.int() n_indexed_lattices = flex.int() n_integrated_lattices = flex.int() sweep_dir_cryst = flex.std_string() orig_dir = os.path.abspath(os.curdir) rmsds = flex.vec3_double() cell_params = flex.sym_mat3_double() n_indexed = flex.double() d_min_indexed = flex.double() rmsds = flex.vec3_double() nproc = easy_mp.get_processes(libtbx.Auto) # nproc = 1 results = easy_mp.parallel_map( func=run_once, iterable=args, processes=nproc, method="multiprocessing", preserve_order=True, asynchronous=True, preserve_exception_message=True, ) for result in results: if result is None: continue sweep_directories.append(result.sweep_dir) templates.append(result.template) n_strong_spots.append(result.n_strong_spots) n_strong_spots_dmin_4.append(result.n_strong_spots_dmin_4) n_unindexed_spots.append(result.n_unindexed_spots) n_indexed_lattices.append(result.n_indexed_lattices) n_integrated_lattices.append(result.n_integrated_lattices) d_strong_spots_50th_percentile.append( result.d_strong_spots_50th_percentile) d_strong_spots_95th_percentile.append( result.d_strong_spots_95th_percentile) d_strong_spots_99th_percentile.append( result.d_strong_spots_99th_percentile) cell_params.extend(result.cell_params) n_indexed.extend(result.n_indexed) d_min_indexed.extend(result.d_min_indexed) rmsds.extend(result.rmsds) sweep_dir_cryst.extend(result.sweep_dir_cryst) table_data = [( "sweep_dir", "template", "#strong_spots", "#unindexed_spots", "#lattices", "d_spacing_50th_percentile", "d_spacing_95th_percentile", "d_spacing_99th_percentile", )] for i in range(len(sweep_directories)): table_data.append(( sweep_directories[i], templates[i], str(n_strong_spots[i]), str(n_unindexed_spots[i]), str(n_indexed_lattices[i]), str(d_strong_spots_50th_percentile[i]), str(d_strong_spots_95th_percentile[i]), str(d_strong_spots_99th_percentile[i]), )) with open("results.txt", "wb") as f: print(table_utils.format(table_data, has_header=True, justify="right"), file=f) table_data = [( "sweep_dir", "cell_a", "cell_b", "cell_c", "alpha", "beta", "gamma", "#indexed_reflections", "d_min_indexed", "rmsd_x", "rmsd_y", "rmsd_phi", )] for i in range(len(cell_params)): table_data.append(( sweep_dir_cryst[i], str(cell_params[i][0]), str(cell_params[i][1]), str(cell_params[i][2]), str(cell_params[i][3]), str(cell_params[i][4]), str(cell_params[i][5]), str(n_indexed[i]), str(d_min_indexed[i]), str(rmsds[i][0]), str(rmsds[i][1]), str(rmsds[i][2]), )) with open("results_indexed.txt", "wb") as f: print(table_utils.format(table_data, has_header=True, justify="right"), file=f) cell_a = flex.double([params[0] for params in cell_params]) cell_b = flex.double([params[1] for params in cell_params]) cell_c = flex.double([params[2] for params in cell_params]) cell_alpha = flex.double([params[3] for params in cell_params]) cell_beta = flex.double([params[4] for params in cell_params]) cell_gamma = flex.double([params[5] for params in cell_params]) from matplotlib import pyplot from matplotlib.backends.backend_pdf import PdfPages pyplot.rc("font", family="serif") pyplot.rc("font", serif="Times New Roman") red, blue = "#B2182B", "#2166AC" hist = flex.histogram(n_strong_spots_dmin_4.as_double(), n_slots=20) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.bar( hist.slot_centers(), hist.slots(), width=0.75 * hist.slot_width(), color=blue, edgecolor=blue, ) ax.set_xlabel("Spot count") ax.set_ylabel("Frequency") pdf = PdfPages("spot_count_histogram.pdf") pdf.savefig(fig) pdf.close() # pyplot.show() hist = flex.histogram(n_indexed_lattices.as_double(), n_slots=flex.max(n_indexed_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.bar( range(int(hist.data_max())), hist.slots(), width=0.75 * hist.slot_width(), align="center", color=blue, edgecolor=blue, ) ax.set_xlim(-0.5, hist.data_max() - 0.5) ax.set_xticks(range(0, int(hist.data_max()))) ax.set_xlabel("Number of indexed lattices") ax.set_ylabel("Frequency") pdf = PdfPages("n_indexed_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() # pyplot.show() if flex.max(n_integrated_lattices) > 0: hist = flex.histogram(n_integrated_lattices.as_double(), n_slots=flex.max(n_integrated_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.bar( range(int(hist.data_max())), hist.slots(), width=0.75 * hist.slot_width(), align="center", color=blue, edgecolor=blue, ) ax.set_xlim(-0.5, hist.data_max() - 0.5) ax.set_xticks(range(0, int(hist.data_max()))) ax.set_xlabel("Number of integrated lattices") ax.set_ylabel("Frequency") pdf = PdfPages("n_integrated_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() # pyplot.show() fig, axes = pyplot.subplots(nrows=2, ncols=3, squeeze=False) for i, cell_param in enumerate( (cell_a, cell_b, cell_c, cell_alpha, cell_beta, cell_gamma)): ax = axes.flat[i] flex.min_max_mean_double(cell_param).show() print(flex.median(cell_param)) hist = flex.histogram(cell_param, n_slots=20) hist.show() ax.bar( hist.slot_centers(), hist.slots(), width=0.75 * hist.slot_width(), color=blue, edgecolor=blue, ) ax.set_xlabel("Cell parameter") ax.set_ylabel("Frequency") pyplot.tight_layout() pdf = PdfPages("cell_parameters.pdf") pdf.savefig(fig) pdf.close()
def run(args): sweep_directories = [] templates = [] n_strong_spots = flex.int() n_strong_spots_dmin_4 = flex.int() d_strong_spots_99th_percentile = flex.double() d_strong_spots_95th_percentile = flex.double() d_strong_spots_50th_percentile = flex.double() n_unindexed_spots = flex.int() n_indexed_lattices = flex.int() n_integrated_lattices = flex.int() sweep_dir_cryst = flex.std_string() orig_dir = os.path.abspath(os.curdir) rmsds = flex.vec3_double() cell_params = flex.sym_mat3_double() n_indexed = flex.double() d_min_indexed = flex.double() rmsds = flex.vec3_double() nproc = easy_mp.get_processes(libtbx.Auto) #nproc = 1 results = easy_mp.parallel_map( func=run_once, iterable=args, processes=nproc, method="multiprocessing", preserve_order=True, asynchronous=True, preserve_exception_message=True, ) for result in results: if result is None: continue sweep_directories.append(result.sweep_dir) templates.append(result.template) n_strong_spots.append(result.n_strong_spots) n_strong_spots_dmin_4.append(result.n_strong_spots_dmin_4) n_unindexed_spots.append(result.n_unindexed_spots) n_indexed_lattices.append(result.n_indexed_lattices) n_integrated_lattices.append(result.n_integrated_lattices) d_strong_spots_50th_percentile.append(result.d_strong_spots_50th_percentile) d_strong_spots_95th_percentile.append(result.d_strong_spots_95th_percentile) d_strong_spots_99th_percentile.append(result.d_strong_spots_99th_percentile) cell_params.extend(result.cell_params) n_indexed.extend(result.n_indexed) d_min_indexed.extend(result.d_min_indexed) rmsds.extend(result.rmsds) sweep_dir_cryst.extend(result.sweep_dir_cryst) table_data = [('sweep_dir', 'template', '#strong_spots', '#unindexed_spots', '#lattices', 'd_spacing_50th_percentile', 'd_spacing_95th_percentile', 'd_spacing_99th_percentile',)] for i in range(len(sweep_directories)): table_data.append((sweep_directories[i], templates[i], str(n_strong_spots[i]), str(n_unindexed_spots[i]), str(n_indexed_lattices[i]), str(d_strong_spots_50th_percentile[i]), str(d_strong_spots_95th_percentile[i]), str(d_strong_spots_99th_percentile[i]), )) with open('results.txt', 'wb') as f: print >> f, table_utils.format( table_data, has_header=True, justify='right') table_data = [('sweep_dir', 'cell_a', 'cell_b', 'cell_c', 'alpha', 'beta', 'gamma', '#indexed_reflections', 'd_min_indexed', 'rmsd_x', 'rmsd_y', 'rmsd_phi')] for i in range(len(cell_params)): table_data.append((sweep_dir_cryst[i], str(cell_params[i][0]), str(cell_params[i][1]), str(cell_params[i][2]), str(cell_params[i][3]), str(cell_params[i][4]), str(cell_params[i][5]), str(n_indexed[i]), str(d_min_indexed[i]), str(rmsds[i][0]), str(rmsds[i][1]), str(rmsds[i][2]), )) with open('results_indexed.txt', 'wb') as f: print >> f, table_utils.format( table_data, has_header=True, justify='right') cell_a = flex.double([params[0] for params in cell_params]) cell_b = flex.double([params[1] for params in cell_params]) cell_c = flex.double([params[2] for params in cell_params]) cell_alpha = flex.double([params[3] for params in cell_params]) cell_beta = flex.double([params[4] for params in cell_params]) cell_gamma = flex.double([params[5] for params in cell_params]) from matplotlib import pyplot from matplotlib.backends.backend_pdf import PdfPages pyplot.rc('font', family='serif') pyplot.rc('font', serif='Times New Roman') red, blue = '#B2182B', '#2166AC' hist = flex.histogram(n_strong_spots_dmin_4.as_double(), n_slots=20) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(), color=blue, edgecolor=blue) ax.set_xlabel('Spot count') ax.set_ylabel('Frequency') pdf = PdfPages("spot_count_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() hist = flex.histogram(n_indexed_lattices.as_double(), n_slots=flex.max(n_indexed_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(range(int(hist.data_max())), hist.slots(), width=0.75*hist.slot_width(), align='center', color=blue, edgecolor=blue) ax.set_xlim(-0.5, hist.data_max()-0.5) ax.set_xticks(range(0,int(hist.data_max()))) ax.set_xlabel('Number of indexed lattices') ax.set_ylabel('Frequency') pdf = PdfPages("n_indexed_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() if flex.max(n_integrated_lattices) > 0: hist = flex.histogram(n_integrated_lattices.as_double(), n_slots=flex.max(n_integrated_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(range(int(hist.data_max())), hist.slots(), width=0.75*hist.slot_width(), align='center', color=blue, edgecolor=blue) ax.set_xlim(-0.5, hist.data_max()-0.5) ax.set_xticks(range(0,int(hist.data_max()))) ax.set_xlabel('Number of integrated lattices') ax.set_ylabel('Frequency') pdf = PdfPages("n_integrated_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() fig, axes = pyplot.subplots(nrows=2, ncols=3, squeeze=False) for i, cell_param in enumerate( (cell_a, cell_b, cell_c, cell_alpha, cell_beta, cell_gamma)): ax = axes.flat[i] flex.min_max_mean_double(cell_param).show() print flex.median(cell_param) hist = flex.histogram(cell_param, n_slots=20) hist.show() ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(), color=blue, edgecolor=blue) ax.set_xlabel('Cell parameter') ax.set_ylabel('Frequency') pyplot.tight_layout() pdf = PdfPages("cell_parameters.pdf") pdf.savefig(fig) pdf.close()
def plot_uc_histograms(uc_params, outliers, steps_per_angstrom=20, plot_name='uc_histograms.png'): from matplotlib import pyplot as plt plt.style.use('ggplot') uc_labels = ['a', 'b', 'c'] f, ax = plt.subplots(nrows=2, ncols=3, figsize=(12, 8)) a, b, c = uc_params[:3] def uc_param_hist2d(p1, p2, ax): nbins = 100 import numpy as np H, xedges, yedges = np.histogram2d(p1, p2, bins=nbins) H = np.rot90(H) H = np.flipud(H) Hmasked = np.ma.masked_where(H == 0, H) ax.pcolormesh(xedges, yedges, Hmasked) uc_param_hist2d(a, b, ax[0][0]) uc_param_hist2d(b, c, ax[0][1]) uc_param_hist2d(c, a, ax[0][2]) for i in range(3): mmm = flex.min_max_mean_double(uc_params[i]) import math steps_per_A = steps_per_angstrom Amin = math.floor(mmm.min * steps_per_A) / steps_per_A Amax = math.floor(mmm.max * steps_per_A) / steps_per_A n_slots = int((Amax - Amin) * steps_per_A) hist = flex.histogram(uc_params[i], Amin, Amax, n_slots=n_slots) hist_inliers = flex.histogram(uc_params[i].select(~outliers), Amin, Amax, n_slots=n_slots) ax[1][i].bar(hist.slot_centers(), hist.slots(), align='center', width=hist.slot_width(), zorder=10, color='black', edgecolor=None, linewidth=0) ax[1][i].bar(hist_inliers.slot_centers(), hist_inliers.slots(), align='center', width=hist_inliers.slot_width(), zorder=10, color='red', edgecolor=None, linewidth=0) ax[0][0].set_ylabel('b ($\AA$)') ax[0][1].set_ylabel('c ($\AA$)') ax[0][2].set_ylabel('a ($\AA$)') ax[1][0].set_xlabel('a ($\AA$)') ax[1][1].set_xlabel('b ($\AA$)') ax[1][2].set_xlabel('c ($\AA$)') f.savefig(plot_name) plt.tight_layout() plt.close(f)