def test_make_cluster_plots(): from xfel.clustering.cluster import Cluster c1 = Cluster.from_iterable([ (10.0, 10.0, 10.0, 90, 90, 90, "P1"), (10.1, 10.1, 10.1, 90, 90, 90, "P1"), (10.2, 10.2, 10.2, 90, 90, 90, "P1"), ]) c2 = Cluster.from_iterable([ (11.0, 11.0, 11.0, 90, 90, 90, "P1"), (11.1, 11.1, 11.1, 90, 90, 90, "P1"), (11.2, 11.2, 11.2, 90, 90, 90, "P1"), (11.3, 11.3, 11.3, 90, 90, 90, "P1"), ]) clusters = [c1, c2] plots = make_cluster_plots(clusters) assert "uc_scatter_0" in plots assert "uc_scatter_1" in plots assert "uc_hist_0" in plots assert "uc_hist_1" in plots print(plots) assert len(plots["uc_hist_0"]["data"]) == 3 assert len(plots["uc_hist_0"]["data"][0]["x"]) == 3 assert len(plots["uc_hist_1"]["data"][0]["x"]) == 4 assert len(plots["uc_scatter_0"]["data"]) == 3 assert len(plots["uc_scatter_0"]["data"][0]["x"]) == 3 assert len(plots["uc_scatter_1"]["data"][0]["x"]) == 4
def run(_args): if _args < 2: raise IOError("Must provide location(s) of pickles") if _args.paths: ucs = Cluster.from_files(raw_input=_args.dirs, n_images=_args.n, dials=_args.dials) else: ucs = Cluster.from_directories(_args.dirs, n_images=_args.n, dials=_args.dials) if not _args.noplot: clusters, _ = ucs.ab_cluster(_args.t, log=_args.log, write_file_lists=_args.nofiles, schnell=_args.schnell, doplot=_args.noplot) print unit_cell_info(clusters) else: plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8)) ax = plt.gca() clusters, cluster_axes = ucs.ab_cluster(_args.t, log=_args.log, ax=ax, write_file_lists=_args.nofiles, schnell=_args.schnell, doplot=_args.noplot) print unit_cell_info(clusters) plt.tight_layout() plt.show()
def __init__(self, vertices, min_common_reflections=10): """ Extends the constructor from cluster.Cluster to describe the cluster as a graph. :param min_common_reflections: number of reflections two images must have in common for an edge to be created. """ Cluster.__init__(self, vertices, "Graph cluster", "made as a graph") self.common_miller_threshold = min_common_reflections self.edges = self._make_edges()
def run(_args): if _args < 2: raise IOError("Must give at least one path to folder of pickles") ucs = Cluster.from_directories(_args.folders, "cluster_42") logging.info("Data imported.") # Set up mega-plot plt.figure(figsize=(22, 15)) gs = gridspec.GridSpec(3, 3, height_ratios=[1, 1, 3]) orr_axes = [plt.subplot(gs[0, 0]), plt.subplot(gs[0, 1]), plt.subplot(gs[0, 2])] inten_axes = [plt.subplot(gs[1, 0]), plt.subplot(gs[1, 1]), plt.subplot(gs[1, 2])] clust_ax = plt.subplot(gs[2, :]) orr_axes = ucs.visualise_orientational_distribution(orr_axes, cbar=True) inten_axes = ucs.intensity_statistics(inten_axes) clusters, cluster_ax = ucs.ab_cluster( _args.t, log=_args.log, ax=clust_ax, schnell=_args.fast, write_file_lists=False ) # plt.text("cluster.42 Plot Everything!") plt.tight_layout() print unit_cell_info(clusters) plt.show()
def run(_args): if _args < 2: raise IOError("Must give at least one path to folder of pickles") ucs = Cluster.from_directories(_args.folders, "cluster_42") logging.info("Data imported.") # Set up mega-plot plt.figure(figsize=(22, 15)) gs = gridspec.GridSpec(3, 3, height_ratios=[1, 1, 3]) orr_axes = [plt.subplot(gs[0, 0]), plt.subplot(gs[0, 1]), plt.subplot(gs[0, 2])] inten_axes = [plt.subplot(gs[1, 0]), plt.subplot(gs[1, 1]), plt.subplot(gs[1, 2])] clust_ax = plt.subplot(gs[2, :]) orr_axes = ucs.visualise_orientational_distribution(orr_axes, cbar=True) inten_axes = ucs.intensity_statistics(inten_axes) clusters, cluster_ax = ucs.ab_cluster(_args.t, log=_args.log, ax=clust_ax, schnell=_args.fast, write_file_lists=False) #plt.text("cluster.42 Plot Everything!") plt.tight_layout() print(unit_cell_info(clusters)) plt.show()
def run(_args): if _args < 2: raise IOError("Must give at least one path to folder of pickles") ucs = Cluster.from_directories(_args.folders, "cxi_targt_uc", n_images=args.n) if not _args.noplot: clusters, _ = ucs.ab_cluster(_args.t, log=_args.log, write_file_lists=_args.nofiles, schnell=_args.schnell, doplot=_args.noplot) else: plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8)) ax = plt.gca() clusters, cluster_axes = ucs.ab_cluster(_args.t, log=_args.log, ax=ax, write_file_lists=_args.nofiles, schnell=_args.schnell, doplot=_args.noplot) plt.tight_layout() plt.show() print unit_cell_info(clusters)
def __init__(self, experiments, reflections, dendrogram=False, threshold=1000, n_max=None): try: from xfel.clustering.cluster import Cluster from xfel.clustering.cluster_groups import unit_cell_info except ImportError: raise Sorry("clustering is not configured") import matplotlib.pyplot as plt ucs = Cluster.from_expts(refl_table=reflections, expts_list=experiments, n_images=n_max) self.clusters, _ = ucs.ab_cluster( threshold=threshold, log=True, # log scale ax=plt.gca() if dendrogram else None, write_file_lists=False, schnell=False, doplot=dendrogram, ) print(unit_cell_info(self.clusters)) self.clustered_frames = { int(c.cname.split("_")[1]): c.members for c in self.clusters } if dendrogram: plt.tight_layout() plt.show()
def __init__(self, vertices, min_common_reflections=10): """ Extends the constructor from cluster.Cluster to describe the cluster as a graph. :param min_common_reflections: number of reflections two images must have in common for an edge to be created. """ # cleanup old edges, if graph was made from a previous graph. for v in vertices: v.edges = [] Cluster.__init__(self, vertices, "Graph cluster", "made as a graph") self.common_miller_threshold = min_common_reflections self.edges = self._make_edges() self.vert_dict = {v: i for i, v in enumerate(self.members)}
def do_cluster_analysis(crystal_symmetries, params): ucs = Cluster.from_crystal_symmetries(crystal_symmetries) if params.plot.show or params.plot.name is not None: if not params.plot.show: import matplotlib # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear matplotlib.use("Agg") # use a non-interactive backend import matplotlib.pyplot as plt plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8)) ax = plt.gca() clusters, cluster_axes = ucs.ab_cluster( params.threshold, log=params.plot.log, ax=ax, write_file_lists=False, doplot=True, ) print(unit_cell_info(clusters)) plt.tight_layout() if params.plot.name is not None: plt.savefig(params.plot.name) if params.plot.show: plt.show() else: clusters, cluster_axes = ucs.ab_cluster(params.threshold, log=params.plot.log, write_file_lists=False, doplot=False) print(unit_cell_info(clusters)) return clusters
def run(_args): if _args < 2: raise IOError("Must give at least one path to folder of pickles") import logging from xfel.clustering.cluster import Cluster FORMAT = '%(levelname)s %(module)s.%(funcName)s: %(message)s' logging.basicConfig(level=logging.WARNING, format=FORMAT) cluster = Cluster.from_directories(_args.folders, 'Command line visualisation') logging.info("data imported") cluster.visualise_orientational_distribution()
def run(_args): if _args < 2: raise IOError("Must give at least one path to folder of pickles") ucs = Cluster.from_directories(_args.folders, "Per-frame-Wilson") logging.info("Data imported.") fig = plt.figure(figsize=(10, 10)) ax = plt.gca() ucs.members[0].plot_wilson(ax=ax) browser = Key_event(ax, ucs.members, fig) fig.canvas.mpl_connect('key_press_event', browser.key_event) plt.show()
def run(_args): if _args < 2: raise IOError("Must give at least one path to folder of pickles") ucs = Cluster.from_directories(_args.folders, "Per-frame-Wilson") logging.info("Data imported.") fig = plt.figure(figsize=(10,10)) ax = plt.gca() ucs.members[0].plot_wilson(ax=ax) browser = Key_event(ax, ucs.members, fig) fig.canvas.mpl_connect('key_press_event', browser.key_event) plt.show()
def do_cluster_analysis(crystal_symmetries, params): try: from xfel.clustering.cluster import Cluster from xfel.clustering.cluster_groups import unit_cell_info except ImportError: raise Sorry( "cluster_unit_cell requires xfel module but is not available") ucs = Cluster.from_crystal_symmetries(crystal_symmetries) if params.plot.show or params.plot.name is not None: if not params.plot.show: import matplotlib # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear matplotlib.use("Agg") # use a non-interactive backend import matplotlib.pyplot as plt plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8)) ax = plt.gca() clusters, cluster_axes = ucs.ab_cluster( params.threshold, log=params.plot.log, ax=ax, write_file_lists=False, # schnell=_args.schnell, doplot=True, ) print(unit_cell_info(clusters)) plt.tight_layout() if params.plot.name is not None: plt.savefig(params.plot.name) if params.plot.show: plt.show() else: clusters, cluster_axes = ucs.ab_cluster( params.threshold, log=params.plot.log, write_file_lists=False, # schnell=_args.schnell, doplot=False, ) print(unit_cell_info(clusters)) return clusters
def unit_cell_clustering(self, plot_name=None): crystal_symmetries = [] for expt in self._data_manager.experiments: crystal_symmetry = expt.crystal.get_crystal_symmetry( assert_is_compatible_unit_cell=False) crystal_symmetries.append(crystal_symmetry.niggli_cell()) lattice_ids = [ expt.identifier for expt in self._data_manager.experiments ] from xfel.clustering.cluster import Cluster from xfel.clustering.cluster_groups import unit_cell_info ucs = Cluster.from_crystal_symmetries(crystal_symmetries, lattice_ids=lattice_ids) if plot_name is not None: from matplotlib import pyplot as plt plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8)) ax = plt.gca() else: ax = None clusters, _ = ucs.ab_cluster( self._params.unit_cell_clustering.threshold, log=self._params.unit_cell_clustering.log, write_file_lists=False, schnell=False, doplot=(plot_name is not None), ax=ax) if plot_name is not None: plt.tight_layout() plt.savefig(plot_name) plt.clf() logger.info(unit_cell_info(clusters)) largest_cluster = None largest_cluster_lattice_ids = None for cluster in clusters: cluster_lattice_ids = [m.lattice_id for m in cluster.members] if largest_cluster_lattice_ids is None: largest_cluster_lattice_ids = cluster_lattice_ids elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids): largest_cluster_lattice_ids = cluster_lattice_ids if len(largest_cluster_lattice_ids) < len(crystal_symmetries): logger.info( 'Selecting subset of data sets for subsequent analysis: %s' % str(largest_cluster_lattice_ids)) self._data_manager.select(largest_cluster_lattice_ids) else: logger.info('Using all data sets for subsequent analysis')
def run(_args): if _args < 2: raise IOError("Must give at least one path to folder of pickles") ucs = Cluster.from_directories(_args.folders, "cluster_intensity_stats") logging.info("Data imported.") plt.figure(figsize=(20,10)) gs = gridspec.GridSpec(3, 2, width_ratios=[1, 3]) inten_axes = [plt.subplot(gs[0,0]), plt.subplot(gs[1,0]), plt.subplot(gs[2,0])] big_axes = plt.subplot(gs[:,1]) ucs.intensity_statistics(ax=inten_axes) ucs.all_frames_intensity_stats(ax=big_axes) plt.tight_layout() plt.show()
def run(self, iterable): # with Capturing() as junk_output: errors = [] try: ucs = Cluster.from_iterable(iterable=iterable) clusters, _ = ucs.ab_cluster(5000, log=False, write_file_lists=False, schnell=True, doplot=False) except Exception as e: print("IOTA ERROR (CLUSTERING): ", e) clusters = [] errors.append(str(e)) info = [] if clusters: for cluster in clusters: uc_init = unit_cell(cluster.medians) symmetry = crystal.symmetry(unit_cell=uc_init, space_group_symbol="P1") groups = lattice_symmetry.metric_subgroups( input_symmetry=symmetry, max_delta=3) top_group = groups.result_groups[0] best_sg = str(groups.lattice_group_info()).split("(")[0] best_uc = top_group["best_subsym"].unit_cell().parameters() uc_no_stdev = ("{:<6.2f} {:<6.2f} {:<6.2f} " "{:<6.2f} {:<6.2f} {:<6.2f} " "".format( best_uc[0], best_uc[1], best_uc[2], best_uc[3], best_uc[4], best_uc[5], )) cluster_info = { "number": len(cluster.members), "pg": str(best_sg), "uc": uc_no_stdev, } info.append(cluster_info) return info, errors
def run(self, iterable): with Capturing() as junk_output: try: ucs = Cluster.from_iterable(iterable=iterable) clusters, _ = ucs.ab_cluster(5000, log=False, write_file_lists=False, schnell=True, doplot=False) except Exception: clusters = [] if len(clusters) > 0: info = [] for cluster in clusters: uc_init = unit_cell(cluster.medians) symmetry = crystal.symmetry(unit_cell=uc_init, space_group_symbol='P1') groups = lattice_symmetry.metric_subgroups( input_symmetry=symmetry, max_delta=3) top_group = groups.result_groups[0] best_uc = top_group['best_subsym'].unit_cell().parameters() best_sg = top_group['best_subsym'].space_group_info() uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \ "{:<6.2f} {:<6.2f} {:<6.2f} " \ "".format(best_uc[0], best_uc[1], best_uc[2], best_uc[3], best_uc[4], best_uc[5]) cluster_info = { 'number': len(cluster.members), 'pg': str(best_sg), 'uc': uc_no_stdev } info.append(cluster_info) else: info = None return info
def cluster_unit_cells(self): input = [] for item in self.spotfinding_info: if item[4] is not None: try: info_line = [float(i) for i in item[4]] info_line.append(item[3]) input.append(info_line) except ValueError: pass with misc.Capturing() as junk_output: try: ucs = Cluster.from_iterable(iterable=input) clusters, _ = ucs.ab_cluster(5000, log=False, write_file_lists=False, schnell=True, doplot=False) except Exception, e: clusters = []
def run(_args): if _args < 2: raise IOError("Must give at least one path to folder of pickles") ucs = Cluster.from_directories(_args.folders, "cxi_targt_uc") if not _args.noplot: clusters, _ = ucs.ab_cluster(_args.t, log=_args.log, write_file_lists=_args.nofiles, schnell=_args.schnell, doplot=_args.noplot) else: plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8)) ax = plt.gca() clusters, cluster_axes = ucs.ab_cluster(_args.t, log=_args.log, ax=ax, write_file_lists=_args.nofiles, schnell=_args.schnell, doplot=_args.noplot) plt.tight_layout() plt.show() print unit_cell_info(clusters)
def report_on_crystal_clusters(crystal_symmetries, make_plots=True): ucs = Cluster.from_crystal_symmetries(crystal_symmetries) clusters, _ = ucs.ab_cluster(5000, log=None, write_file_lists=False, doplot=False) cluster_plots = {} min_cluster_pc = 5 threshold = math.floor((min_cluster_pc / 100) * len(crystal_symmetries)) large_clusters = [c for c in clusters if len(c.members) > threshold] large_clusters.sort(key=lambda x: len(x.members), reverse=True) if large_clusters: logger.info(f""" Unit cell clustering analysis, clusters with >{min_cluster_pc}% of the number of crystals indexed {unit_cell_info(large_clusters)} """) if make_plots: cluster_plots = make_cluster_plots(large_clusters) else: logger.info( f"No clusters found with >{min_cluster_pc}% of the number of crystals." ) return cluster_plots, large_clusters
def unit_cell_analysis(self): """ Calls unit cell analysis module, which uses hierarchical clustering (Zeldin, et al, Acta D, 2015) to split integration results according to detected morphological groupings (if any). Most useful with preliminary integration without target unit cell specified. """ # Will not run clustering if only one integration result found or if turned off if self.final_objects is None: self.cons_uc = None self.cons_pg = None misc.main_log(self.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) misc.main_log(self.logfile, '\n UNIT CELL CANNOT BE DETERMINED!', True) elif len(self.final_objects) == 1: unit_cell = (self.final_objects[0].final['a'], self.final_objects[0].final['b'], self.final_objects[0].final['c'], self.final_objects[0].final['alpha'], self.final_objects[0].final['beta'], self.final_objects[0].final['gamma']) point_group = self.final_objects[0].final['sg'] misc.main_log(self.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) uc_line = "{:<6} {:^4}: {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, "\ "{:<6.2f}, {:<6.2f}".format('(1)', point_group, unit_cell[0], unit_cell[1], unit_cell[2], unit_cell[3], unit_cell[4], unit_cell[5]) misc.main_log(self.logfile, uc_line, True) self.cons_pg = point_group self.cons_uc = unit_cell else: uc_table = [] uc_summary = [] if self.params.analysis.run_clustering: # run hierarchical clustering analysis from xfel.clustering.cluster import Cluster counter = 0 threshold = self.params.analysis.cluster_threshold cluster_limit = self.params.analysis.cluster_limit if self.params.analysis.cluster_n_images > 0: n_images = self.params.analysis.cluster_n_images else: n_images = len(self.final_objects) obj_list = [] if n_images < len(self.final_objects): import random for i in range(n_images): random_number = random.randrange( 0, len(self.final_objects)) if self.final_objects[random_number] in obj_list: while self.final_objects[ random_number] in obj_list: random_number = random.randrange( 0, len(self.final_objects)) obj_list.append(self.final_objects[random_number]) else: obj_list.append(self.final_objects[random_number]) if obj_list == []: obj_list = self.final_objects # Cluster from iterable (this doesn't keep filenames - bad!) # with Capturing() as suppressed_output: # uc_iterable = [] # for obj in obj_list: # unit_cell = (float(obj.final['a']), # float(obj.final['b']), # float(obj.final['c']), # float(obj.final['alpha']), # float(obj.final['beta']), # float(obj.final['gamma']), # obj.final['sg']) # uc_iterable.append(unit_cell) # ucs = Cluster.from_iterable(iterable=uc_iterable) # Cluster from files (slow, but will keep for now) ucs = Cluster.from_files(pickle_list=self.pickles) # Do clustering clusters, _ = ucs.ab_cluster(threshold=threshold, log=False, write_file_lists=False, schnell=False, doplot=False) uc_table.append("\n\n{:-^80}\n"\ "".format(' UNIT CELL ANALYSIS ')) # extract clustering info and add to summary output list if cluster_limit is None: if len(self.pickles) / 10 >= 10: cluster_limit = 10 else: cluster_limit = len(self.pickles) / 10 for cluster in clusters: sorted_pg_comp = sorted(cluster.pg_composition.items(), key=lambda x: -1 * x[1]) pg_nums = [pg[1] for pg in sorted_pg_comp] cons_pg = sorted_pg_comp[np.argmax(pg_nums)] if len(cluster.members) > cluster_limit: counter += 1 # Sort clustered images by mosaicity, lowest to highest cluster_filenames = [j.path for j in cluster.members] clustered_objects = [i for i in self.final_objects if \ i.final['final'] in cluster_filenames] sorted_cluster = sorted(clustered_objects, key=lambda i: i.final['mos']) # Write to file if self.params.analysis.cluster_write_files: output_file = os.path.join( self.output_dir, "uc_cluster_{}.lst".format(counter)) for obj in sorted_cluster: with open(output_file, 'a') as scf: scf.write('{}\n'.format( obj.final['final'])) mark_output = os.path.basename(output_file) else: mark_output = '*' output_file = None else: mark_output = '' output_file = None # Populate clustering info for GUI display uc_init = uctbx.unit_cell(cluster.medians) symmetry = crystal.symmetry(unit_cell=uc_init, space_group_symbol='P1') groups = sgtbx.lattice_symmetry.\ metric_subgroups(input_symmetry=symmetry, max_delta=3) top_group = groups.result_groups[0] best_uc = top_group['best_subsym'].unit_cell().parameters() best_sg = top_group['best_subsym'].space_group_info() uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \ "{:<6.2f} {:<6.2f} {:<6.2f} " \ "".format(best_uc[0], best_uc[1], best_uc[2], best_uc[3], best_uc[4], best_uc[5]) cluster_info = { 'number': len(cluster.members), 'pg': best_sg, 'uc': uc_no_stdev, 'filename': mark_output } self.clusters.append(cluster_info) # format and record output # TODO: How to propagate stdevs after conversion from Niggli? # uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) "\ # "{}".format('({})'.format(len(cluster.members)), cons_pg[0], # cluster.medians[0], cluster.stdevs[0], # cluster.medians[1], cluster.stdevs[1], # cluster.medians[2], cluster.stdevs[2], # cluster.medians[3], cluster.stdevs[3], # cluster.medians[4], cluster.stdevs[4], # cluster.medians[5], cluster.stdevs[5], # mark_output) # uc_table.append(uc_line) uc_table.append("{:<6}: {} {}".format( len(cluster.members), uc_no_stdev, mark_output)) lattices = ', '.join( ['{} ({})'.format(i[0], i[1]) for i in sorted_pg_comp]) # uc_info = [len(cluster.members), cons_pg[0], cluster.medians, # output_file, uc_line, lattices] uc_info = [ len(cluster.members), best_sg, best_uc, output_file, uc_no_stdev, lattices ] uc_summary.append(uc_info) else: # generate average unit cell uc_table.append("\n\n{:-^80}\n" \ "".format(' UNIT CELL AVERAGING (no clustering) ')) uc_a = [i.final['a'] for i in self.final_objects] uc_b = [i.final['b'] for i in self.final_objects] uc_c = [i.final['c'] for i in self.final_objects] uc_alpha = [i.final['alpha'] for i in self.final_objects] uc_beta = [i.final['beta'] for i in self.final_objects] uc_gamma = [i.final['gamma'] for i in self.final_objects] uc_sg = [i.final['sg'] for i in self.final_objects] cons_pg = Counter(uc_sg).most_common(1)[0][0] all_pgs = Counter(uc_sg).most_common() unit_cell = (np.median(uc_a), np.median(uc_b), np.median(uc_c), np.median(uc_alpha), np.median(uc_beta), np.median(uc_gamma)) # Populate clustering info for GUI display uc_init = uctbx.unit_cell(unit_cell) symmetry = crystal.symmetry(unit_cell=uc_init, space_group_symbol='P1') groups = sgtbx.lattice_symmetry. \ metric_subgroups(input_symmetry=symmetry, max_delta=3) top_group = groups.result_groups[0] best_uc = top_group['best_subsym'].unit_cell().parameters() best_sg = top_group['best_subsym'].space_group_info() uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \ "{:<6.2f} {:<6.2f} {:<6.2f} " \ "".format(best_uc[0], best_uc[1], best_uc[2], best_uc[3], best_uc[4], best_uc[5]) cluster_info = { 'number': len(self.final_objects), 'pg': best_sg, 'uc': uc_no_stdev, 'filename': None } self.clusters.append(cluster_info) # uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) " \ # "{}".format('({})'.format(len(self.final_objects)), cons_pg, # np.median(uc_a), np.std(uc_a), # np.median(uc_b), np.std(uc_b), # np.median(uc_c), np.std(uc_c), # np.median(uc_alpha), np.std(uc_alpha), # np.median(uc_beta), np.std(uc_beta), # np.median(uc_gamma), np.std(uc_gamma), '') # # uc_table.append(uc_line) uc_table.append(uc_no_stdev) lattices = ', '.join( ['{} ({})'.format(i[0], i[1]) for i in all_pgs]) # uc_info = [len(self.final_objects), cons_pg, unit_cell, None, # uc_line, lattices] uc_info = [ len(self.final_objects), best_sg, best_uc, None, uc_no_stdev, lattices ] uc_summary.append(uc_info) uc_table.append('\nMost common unit cell:\n') # select the most prevalent unit cell (most members in cluster) uc_freqs = [i[0] for i in uc_summary] uc_pick = uc_summary[np.argmax(uc_freqs)] uc_table.append(uc_pick[4]) uc_table.append('\nBravais Lattices in Biggest Cluster: {}' ''.format(uc_pick[5])) self.cons_pg = uc_pick[1] self.cons_uc = uc_pick[2] if uc_pick[3] != None: self.prime_data_path = uc_pick[3] for item in uc_table: misc.main_log(self.logfile, item, (not self.gui_mode)) self.analysis_result.__setattr__('clusters', self.clusters) self.analysis_result.__setattr__('cons_pg', self.cons_pg) self.analysis_result.__setattr__('cons_uc', self.cons_uc) if self.gui_mode: return self.clusters
def unit_cell_analysis(self, write_files=True): """ Calls unit cell analysis module, which uses hierarchical clustering (Zeldin, et al, Acta D, 2015) to split integration results according to detected morphological groupings (if any). Most useful with preliminary integration without target unit cell specified. """ # Will not run clustering if only one integration result found or if turned off if self.final_objects is None: self.cons_uc = None self.cons_pg = None misc.main_log(self.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) misc.main_log(self.logfile, '\n UNIT CELL CANNOT BE DETERMINED!', True) elif len(self.final_objects) == 1: unit_cell = (self.final_objects[0].final['a'], self.final_objects[0].final['b'], self.final_objects[0].final['c'], self.final_objects[0].final['alpha'], self.final_objects[0].final['beta'], self.final_objects[0].final['gamma']) point_group = self.final_objects[0].final['sg'] misc.main_log(self.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) uc_line = "{:<6} {:^4}: {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, "\ "{:<6.2f}, {:<6.2f}".format('(1)', point_group, unit_cell[0], unit_cell[1], unit_cell[2], unit_cell[3], unit_cell[4], unit_cell[5]) misc.main_log(self.logfile, uc_line, True) self.cons_pg = point_group self.cons_uc = unit_cell else: uc_table = [] uc_summary = [] if self.params.analysis.run_clustering: # run hierarchical clustering analysis from xfel.clustering.cluster import Cluster counter = 0 ucs = Cluster.from_files(pickle_list=self.pickles, use_b=True) clusters, _ = ucs.ab_cluster( self.params.analysis.cluster_threshold, log=False, write_file_lists=False, schnell=False, doplot=False) uc_table.append("\n\n{:-^80}\n"\ "".format(' UNIT CELL ANALYSIS ')) # extract clustering info and add to summary output list if len(self.pickles) / 10 >= 10: cluster_limit = 10 else: cluster_limit = len(self.pickles) / 10 for cluster in clusters: sorted_pg_comp = sorted(cluster.pg_composition.items(), key=lambda x: -1 * x[1]) pg_nums = [pg[1] for pg in sorted_pg_comp] cons_pg = sorted_pg_comp[np.argmax(pg_nums)] if len(cluster.members) > cluster_limit: counter += 1 # Sort clustered images by mosaicity, lowest to highest cluster_filenames = [j.path for j in cluster.members] clustered_objects = [i for i in self.final_objects if \ i.final['final'] in cluster_filenames] sorted_cluster = sorted(clustered_objects, key=lambda i: i.final['mos']) # Write to file if write_files: output_file = os.path.join( self.output_dir, "uc_cluster_{}.lst".format(counter)) for obj in sorted_cluster: with open(output_file, 'a') as scf: scf.write('{}\n'.format( obj.final['final'])) mark_output = os.path.basename(output_file) else: mark_output = '*' output_file = None # Populate clustering info for GUI display uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \ "{:<6.2f} {:<6.2f} {:<6.2f} " \ "".format(cluster.medians[0], cluster.medians[1], cluster.medians[2], cluster.medians[3], cluster.medians[4], cluster.medians[5]) cluster_info = { 'number': len(cluster.members), 'pg': cons_pg[0], 'uc': uc_no_stdev, 'filename': mark_output } self.clusters.append(cluster_info) else: mark_output = '' output_file = None # format and record output uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) "\ "{}".format('({})'.format(len(cluster.members)), cons_pg[0], cluster.medians[0], cluster.stdevs[0], cluster.medians[1], cluster.stdevs[1], cluster.medians[2], cluster.stdevs[2], cluster.medians[3], cluster.stdevs[3], cluster.medians[4], cluster.stdevs[4], cluster.medians[5], cluster.stdevs[5], mark_output) uc_table.append(uc_line) lattices = ', '.join( ['{} ({})'.format(i[0], i[1]) for i in sorted_pg_comp]) uc_info = [ len(cluster.members), cons_pg[0], cluster.medians, output_file, uc_line, lattices ] uc_summary.append(uc_info) else: # generate average unit cell uc_table.append("\n\n{:-^80}\n" \ "".format(' UNIT CELL AVERAGING (no clustering) ')) uc_a = [i.final['a'] for i in self.final_objects] uc_b = [i.final['b'] for i in self.final_objects] uc_c = [i.final['c'] for i in self.final_objects] uc_alpha = [i.final['alpha'] for i in self.final_objects] uc_beta = [i.final['beta'] for i in self.final_objects] uc_gamma = [i.final['gamma'] for i in self.final_objects] uc_sg = [i.final['sg'] for i in self.final_objects] cons_pg = Counter(uc_sg).most_common(1)[0][0] all_pgs = Counter(uc_sg).most_common() uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \ "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \ "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) " \ "{}".format('({})'.format(len(self.final_objects)), cons_pg, np.median(uc_a), np.std(uc_a), np.median(uc_b), np.std(uc_b), np.median(uc_c), np.std(uc_c), np.median(uc_alpha), np.std(uc_alpha), np.median(uc_beta), np.std(uc_beta), np.median(uc_gamma), np.std(uc_gamma), '') unit_cell = (np.median(uc_a), np.median(uc_b), np.median(uc_c), np.median(uc_alpha), np.median(uc_beta), np.median(uc_gamma)) uc_table.append(uc_line) lattices = ', '.join( ['{} ({})'.format(i[0], i[1]) for i in all_pgs]) uc_info = [ len(self.final_objects), cons_pg, unit_cell, None, uc_line, lattices ] uc_summary.append(uc_info) uc_table.append('\nMost common unit cell:\n') # select the most prevalent unit cell (most members in cluster) uc_freqs = [i[0] for i in uc_summary] uc_pick = uc_summary[np.argmax(uc_freqs)] uc_table.append(uc_pick[4]) uc_table.append('\nBravais Lattices in Biggest Cluster: {}' ''.format(uc_pick[5])) self.cons_pg = uc_pick[1] self.cons_uc = uc_pick[2] if uc_pick[3] != None: self.prime_data_path = uc_pick[3] for item in uc_table: misc.main_log(self.logfile, item, (not self.gui_mode)) if self.gui_mode: return self.cons_pg, self.cons_uc, self.clusters
def run(self): '''Execute the script.''' from dials.util.options import flatten_experiments from libtbx.utils import Sorry # Parse the command line params, options = self.parser.parse_args(show_diff_phil=True) # Try to load the models and data if len(params.input.experiments) == 0: print "No Experiments found in the input" self.parser.print_help() return if len(params.input.reflections) == 0: print "No reflection data found in the input" self.parser.print_help() return try: assert len(params.input.reflections) == len( params.input.experiments) except AssertionError: raise Sorry( "The number of input reflections files does not match the " "number of input experiments") flat_exps = flatten_experiments(params.input.experiments) ref_beam = params.reference_from_experiment.beam ref_goniometer = params.reference_from_experiment.goniometer ref_scan = params.reference_from_experiment.scan ref_crystal = params.reference_from_experiment.crystal ref_detector = params.reference_from_experiment.detector if ref_beam is not None: try: ref_beam = flat_exps[ref_beam].beam except IndexError: raise Sorry( "{0} is not a valid experiment ID".format(ref_beam)) if ref_goniometer is not None: try: ref_goniometer = flat_exps[ref_goniometer].goniometer except IndexError: raise Sorry( "{0} is not a valid experiment ID".format(ref_goniometer)) if ref_scan is not None: try: ref_scan = flat_exps[ref_scan].scan except IndexError: raise Sorry( "{0} is not a valid experiment ID".format(ref_scan)) if ref_crystal is not None: try: ref_crystal = flat_exps[ref_crystal].crystal except IndexError: raise Sorry( "{0} is not a valid experiment ID".format(ref_crystal)) if ref_detector is not None: assert not params.reference_from_experiment.average_detector try: ref_detector = flat_exps[ref_detector].detector except IndexError: raise Sorry( "{0} is not a valid experiment ID".format(ref_detector)) elif params.reference_from_experiment.average_detector: # Average all of the detectors together from scitbx.matrix import col def average_detectors(target, panelgroups, depth): # Recursive function to do the averaging if params.reference_from_experiment.average_hierarchy_level is None or \ depth == params.reference_from_experiment.average_hierarchy_level: n = len(panelgroups) sum_fast = col((0.0, 0.0, 0.0)) sum_slow = col((0.0, 0.0, 0.0)) sum_ori = col((0.0, 0.0, 0.0)) # Average the d matrix vectors for pg in panelgroups: sum_fast += col(pg.get_local_fast_axis()) sum_slow += col(pg.get_local_slow_axis()) sum_ori += col(pg.get_local_origin()) sum_fast /= n sum_slow /= n sum_ori /= n # Re-orthagonalize the slow and the fast vectors by rotating around the cross product c = sum_fast.cross(sum_slow) a = sum_fast.angle(sum_slow, deg=True) / 2 sum_fast = sum_fast.rotate(c, a - 45, deg=True) sum_slow = sum_slow.rotate(c, -(a - 45), deg=True) target.set_local_frame(sum_fast, sum_slow, sum_ori) if target.is_group(): # Recurse for i, target_pg in enumerate(target): average_detectors(target_pg, [pg[i] for pg in panelgroups], depth + 1) ref_detector = flat_exps[0].detector average_detectors(ref_detector.hierarchy(), [e.detector.hierarchy() for e in flat_exps], 0) combine = CombineWithReference(beam=ref_beam, goniometer=ref_goniometer, scan=ref_scan, crystal=ref_crystal, detector=ref_detector, params=params) # set up global experiments and reflections lists from dials.array_family import flex reflections = flex.reflection_table() global_id = 0 from dxtbx.model.experiment_list import ExperimentList experiments = ExperimentList() # loop through the input, building up the global lists nrefs_per_exp = [] for ref_wrapper, exp_wrapper in zip(params.input.reflections, params.input.experiments): refs = ref_wrapper.data exps = exp_wrapper.data for i, exp in enumerate(exps): sel = refs['id'] == i sub_ref = refs.select(sel) nrefs_per_exp.append(len(sub_ref)) sub_ref['id'] = flex.int(len(sub_ref), global_id) if params.output.delete_shoeboxes and 'shoebox' in sub_ref: del sub_ref['shoebox'] reflections.extend(sub_ref) experiments.append(combine(exp)) global_id += 1 # print number of reflections per experiment from libtbx.table_utils import simple_table header = ["Experiment", "Nref"] rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)] st = simple_table(rows, header) print st.format() # save a random subset if requested if params.output.n_subset is not None and len( experiments) > params.output.n_subset: import random subset_exp = ExperimentList() subset_refls = flex.reflection_table() n_picked = 0 indices = range(len(experiments)) while n_picked < params.output.n_subset: idx = indices.pop(random.randint(0, len(indices) - 1)) subset_exp.append(experiments[idx]) refls = reflections.select(reflections['id'] == idx) refls['id'] = flex.int(len(refls), n_picked) subset_refls.extend(refls) n_picked += 1 experiments = subset_exp reflections = subset_refls def save_output(experiments, reflections, exp_name, refl_name): # save output from dxtbx.model.experiment_list import ExperimentListDumper print 'Saving combined experiments to {0}'.format(exp_name) dump = ExperimentListDumper(experiments) dump.as_json(exp_name) print 'Saving combined reflections to {0}'.format(refl_name) reflections.as_pickle(refl_name) def save_in_batches(experiments, reflections, exp_name, refl_name, batch_size=1000): from dxtbx.command_line.image_average import splitit import os result = [] for i, indices in enumerate( splitit(range(len(experiments)), (len(experiments) // batch_size) + 1)): batch_expts = ExperimentList() batch_refls = flex.reflection_table() for sub_id, sub_idx in enumerate(indices): batch_expts.append(experiments[sub_idx]) sub_refls = reflections.select( reflections['id'] == sub_idx) sub_refls['id'] = flex.int(len(sub_refls), sub_id) batch_refls.extend(sub_refls) exp_filename = os.path.splitext(exp_name)[0] + "_%03d.json" % i ref_filename = os.path.splitext( refl_name)[0] + "_%03d.pickle" % i save_output(batch_expts, batch_refls, exp_filename, ref_filename) def combine_in_clusters(experiments_l, reflections_l, exp_name, refl_name, end_count): import os result = [] for cluster in xrange(len(experiments_l)): cluster_expts = ExperimentList() cluster_refls = flex.reflection_table() for i in xrange(len(experiments_l[cluster])): refls = reflections_l[cluster][i] expts = experiments_l[cluster][i] refls['id'] = flex.int(len(refls), i) cluster_expts.append(expts) cluster_refls.extend(refls) exp_filename = os.path.splitext(exp_name)[0] + ( "_cluster%d.json" % (end_count - cluster)) ref_filename = os.path.splitext(refl_name)[0] + ( "_cluster%d.pickle" % (end_count - cluster)) result.append( (cluster_expts, cluster_refls, exp_filename, ref_filename)) return result # cluster the resulting experiments if requested if params.clustering.use: clustered = Cluster(experiments, reflections, dendrogram=params.clustering.dendrogram, threshold=params.clustering.threshold, n_max=params.clustering.max_crystals) n_clusters = len(clustered.clustered_frames) if params.clustering.max_clusters is not None: not_too_many = lambda keeps: len( keeps) < params.clustering.max_clusters else: not_too_many = lambda keeps: True keep_frames = [] sorted_keys = sorted(clustered.clustered_frames.keys()) while len(clustered.clustered_frames) > 0 and not_too_many( keep_frames): keep_frames.append( clustered.clustered_frames.pop(sorted_keys.pop(-1))) if params.clustering.exclude_single_crystal_clusters: keep_frames = [k for k in keep_frames if len(k) > 1] clustered_experiments = [[f.experiment for f in frame_cluster] for frame_cluster in keep_frames] clustered_reflections = [[f.reflections for f in frame_cluster] for frame_cluster in keep_frames] list_of_combined = combine_in_clusters( clustered_experiments, clustered_reflections, params.output.experiments_filename, params.output.reflections_filename, n_clusters) for i in xrange(len(list_of_combined)): savable_tuple = list_of_combined[i] if params.output.max_batch_size is None: save_output(*savable_tuple) else: save_in_batches(*savable_tuple, batch_size=params.output.max_batch_size) else: if params.output.max_batch_size is None: save_output(experiments, reflections, params.output.experiments_filename, params.output.reflections_filename) else: save_in_batches(experiments, reflections, params.output.experiments_filename, params.output.reflections_filename, batch_size=params.output.max_batch_size) return
def run(args: List[str] = None, phil: phil.scope = phil_scope) -> None: parser = OptionParser( usage="", read_experiments=True, read_reflections=True, phil=phil_scope, check_format=False, epilog=__doc__, ) params, _ = parser.parse_args(args=args, show_diff_phil=False) if not params.input.experiments or not params.input.reflections: parser.print_help() sys.exit() reflections, experiments = reflections_and_experiments_from_files( params.input.reflections, params.input.experiments) log.config(verbosity=1, logfile=params.output.log) logger.info(dials_version()) diff_phil = parser.diff_phil.as_str() if diff_phil: logger.info("The following parameters have been modified:\n%s", diff_phil) st = time.time() indexed_experiments, indexed_reflections, summary_data = index( experiments, reflections[0], params) # print some clustering information ucs = Cluster.from_crystal_symmetries([ crystal.symmetry( unit_cell=expt.crystal.get_unit_cell(), space_group=expt.crystal.get_space_group(), ) for expt in indexed_experiments ]) clusters, _ = ucs.ab_cluster(5000, log=None, write_file_lists=False, doplot=False) large_clusters = [] cluster_plots = {} threshold = math.floor(0.05 * len(indexed_experiments)) for cluster in clusters: if len(cluster.members) > threshold: large_clusters.append(cluster) large_clusters.sort(key=lambda x: len(x.members), reverse=True) if large_clusters: logger.info(f""" Unit cell clustering analysis, clusters with >5% of the number of crystals indexed """ + unit_cell_info(large_clusters)) if params.output.html or params.output.json: cluster_plots = make_cluster_plots(large_clusters) else: logger.info(f"No clusters found with >5% of the number of crystals.") logger.info(f"Saving indexed experiments to {params.output.experiments}") indexed_experiments.as_file(params.output.experiments) logger.info(f"Saving indexed reflections to {params.output.reflections}") indexed_reflections.as_file(params.output.reflections) if params.output.html or params.output.json: summary_plots = generate_plots(summary_data) if cluster_plots: summary_plots.update(cluster_plots) if params.output.html: generate_html_report(summary_plots, params.output.html) if params.output.json: with open(params.output.json, "w") as outfile: json.dump(summary_plots, outfile) logger.info(f"Total time: {time.time() - st:.2f}s")
def run_with_preparsed(self, params, options): """Run combine_experiments, but allow passing in of parameters""" from dials.util.options import flatten_experiments # Try to load the models and data if len(params.input.experiments) == 0: print("No Experiments found in the input") self.parser.print_help() return if len(params.input.reflections) == 0: print("No reflection data found in the input") self.parser.print_help() return try: assert len(params.input.reflections) == len( params.input.experiments) except AssertionError: raise Sorry( "The number of input reflections files does not match the " "number of input experiments") flat_exps = flatten_experiments(params.input.experiments) ref_beam = params.reference_from_experiment.beam ref_goniometer = params.reference_from_experiment.goniometer ref_scan = params.reference_from_experiment.scan ref_crystal = params.reference_from_experiment.crystal ref_detector = params.reference_from_experiment.detector if ref_beam is not None: try: ref_beam = flat_exps[ref_beam].beam except IndexError: raise Sorry("{} is not a valid experiment ID".format(ref_beam)) if ref_goniometer is not None: try: ref_goniometer = flat_exps[ref_goniometer].goniometer except IndexError: raise Sorry( "{} is not a valid experiment ID".format(ref_goniometer)) if ref_scan is not None: try: ref_scan = flat_exps[ref_scan].scan except IndexError: raise Sorry("{} is not a valid experiment ID".format(ref_scan)) if ref_crystal is not None: try: ref_crystal = flat_exps[ref_crystal].crystal except IndexError: raise Sorry( "{} is not a valid experiment ID".format(ref_crystal)) if ref_detector is not None: assert not params.reference_from_experiment.average_detector try: ref_detector = flat_exps[ref_detector].detector except IndexError: raise Sorry( "{} is not a valid experiment ID".format(ref_detector)) elif params.reference_from_experiment.average_detector: # Average all of the detectors together from scitbx.matrix import col def average_detectors(target, panelgroups, depth): # Recursive function to do the averaging if (params.reference_from_experiment.average_hierarchy_level is None or depth == params.reference_from_experiment. average_hierarchy_level): n = len(panelgroups) sum_fast = col((0.0, 0.0, 0.0)) sum_slow = col((0.0, 0.0, 0.0)) sum_ori = col((0.0, 0.0, 0.0)) # Average the d matrix vectors for pg in panelgroups: sum_fast += col(pg.get_local_fast_axis()) sum_slow += col(pg.get_local_slow_axis()) sum_ori += col(pg.get_local_origin()) sum_fast /= n sum_slow /= n sum_ori /= n # Re-orthagonalize the slow and the fast vectors by rotating around the cross product c = sum_fast.cross(sum_slow) a = sum_fast.angle(sum_slow, deg=True) / 2 sum_fast = sum_fast.rotate(c, a - 45, deg=True) sum_slow = sum_slow.rotate(c, -(a - 45), deg=True) target.set_local_frame(sum_fast, sum_slow, sum_ori) if target.is_group(): # Recurse for i, target_pg in enumerate(target): average_detectors(target_pg, [pg[i] for pg in panelgroups], depth + 1) ref_detector = flat_exps[0].detector average_detectors(ref_detector.hierarchy(), [e.detector.hierarchy() for e in flat_exps], 0) combine = CombineWithReference( beam=ref_beam, goniometer=ref_goniometer, scan=ref_scan, crystal=ref_crystal, detector=ref_detector, params=params, ) # set up global experiments and reflections lists from dials.array_family import flex reflections = flex.reflection_table() global_id = 0 skipped_expts = 0 from dxtbx.model.experiment_list import ExperimentList experiments = ExperimentList() # loop through the input, building up the global lists nrefs_per_exp = [] for ref_wrapper, exp_wrapper in zip(params.input.reflections, params.input.experiments): refs = ref_wrapper.data exps = exp_wrapper.data for i, exp in enumerate(exps): sel = refs["id"] == i sub_ref = refs.select(sel) n_sub_ref = len(sub_ref) if (params.output.min_reflections_per_experiment is not None and n_sub_ref < params.output.min_reflections_per_experiment): skipped_expts += 1 continue nrefs_per_exp.append(n_sub_ref) sub_ref["id"] = flex.int(len(sub_ref), global_id) if params.output.delete_shoeboxes and "shoebox" in sub_ref: del sub_ref["shoebox"] reflections.extend(sub_ref) try: experiments.append(combine(exp)) except ComparisonError as e: # When we failed tolerance checks, give a useful error message (path, index) = find_experiment_in(exp, params.input.experiments) raise Sorry( "Model didn't match reference within required tolerance for experiment {} in {}:" "\n{}\nAdjust tolerances or set compare_models=False to ignore differences." .format(index, path, str(e))) global_id += 1 if (params.output.min_reflections_per_experiment is not None and skipped_expts > 0): print("Removed {0} experiments with fewer than {1} reflections". format(skipped_expts, params.output.min_reflections_per_experiment)) # print number of reflections per experiment from libtbx.table_utils import simple_table header = ["Experiment", "Number of reflections"] rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)] st = simple_table(rows, header) print(st.format()) # save a random subset if requested if (params.output.n_subset is not None and len(experiments) > params.output.n_subset): subset_exp = ExperimentList() subset_refls = flex.reflection_table() if params.output.n_subset_method == "random": n_picked = 0 indices = list(range(len(experiments))) while n_picked < params.output.n_subset: idx = indices.pop(random.randint(0, len(indices) - 1)) subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), n_picked) subset_refls.extend(refls) n_picked += 1 print( "Selecting a random subset of {0} experiments out of {1} total." .format(params.output.n_subset, len(experiments))) elif params.output.n_subset_method == "n_refl": if params.output.n_refl_panel_list is None: refls_subset = reflections else: sel = flex.bool(len(reflections), False) for p in params.output.n_refl_panel_list: sel |= reflections["panel"] == p refls_subset = reflections.select(sel) refl_counts = flex.int() for expt_id in range(len(experiments)): refl_counts.append( len(refls_subset.select( refls_subset["id"] == expt_id))) sort_order = flex.sort_permutation(refl_counts, reverse=True) for expt_id, idx in enumerate( sort_order[:params.output.n_subset]): subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), expt_id) subset_refls.extend(refls) print( "Selecting a subset of {0} experiments with highest number of reflections out of {1} total." .format(params.output.n_subset, len(experiments))) elif params.output.n_subset_method == "significance_filter": from dials.algorithms.integration.stills_significance_filter import ( SignificanceFilter, ) params.output.significance_filter.enable = True sig_filter = SignificanceFilter(params.output) refls_subset = sig_filter(experiments, reflections) refl_counts = flex.int() for expt_id in range(len(experiments)): refl_counts.append( len(refls_subset.select( refls_subset["id"] == expt_id))) sort_order = flex.sort_permutation(refl_counts, reverse=True) for expt_id, idx in enumerate( sort_order[:params.output.n_subset]): subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), expt_id) subset_refls.extend(refls) experiments = subset_exp reflections = subset_refls def save_in_batches(experiments, reflections, exp_name, refl_name, batch_size=1000): from dxtbx.command_line.image_average import splitit for i, indices in enumerate( splitit(list(range(len(experiments))), (len(experiments) // batch_size) + 1)): batch_expts = ExperimentList() batch_refls = flex.reflection_table() for sub_id, sub_idx in enumerate(indices): batch_expts.append(experiments[sub_idx]) sub_refls = reflections.select( reflections["id"] == sub_idx) sub_refls["id"] = flex.int(len(sub_refls), sub_id) batch_refls.extend(sub_refls) exp_filename = os.path.splitext(exp_name)[0] + "_%03d.expt" % i ref_filename = os.path.splitext( refl_name)[0] + "_%03d.refl" % i self._save_output(batch_expts, batch_refls, exp_filename, ref_filename) def combine_in_clusters(experiments_l, reflections_l, exp_name, refl_name, end_count): result = [] for cluster, experiment in enumerate(experiments_l): cluster_expts = ExperimentList() cluster_refls = flex.reflection_table() for i, expts in enumerate(experiment): refls = reflections_l[cluster][i] refls["id"] = flex.int(len(refls), i) cluster_expts.append(expts) cluster_refls.extend(refls) exp_filename = os.path.splitext(exp_name)[0] + ( "_cluster%d.expt" % (end_count - cluster)) ref_filename = os.path.splitext(refl_name)[0] + ( "_cluster%d.refl" % (end_count - cluster)) result.append( (cluster_expts, cluster_refls, exp_filename, ref_filename)) return result # cluster the resulting experiments if requested if params.clustering.use: clustered = Cluster( experiments, reflections, dendrogram=params.clustering.dendrogram, threshold=params.clustering.threshold, n_max=params.clustering.max_crystals, ) n_clusters = len(clustered.clustered_frames) def not_too_many(keeps): if params.clustering.max_clusters is not None: return len(keeps) < params.clustering.max_clusters return True keep_frames = [] sorted_keys = sorted(clustered.clustered_frames.keys()) while len(clustered.clustered_frames) > 0 and not_too_many( keep_frames): keep_frames.append( clustered.clustered_frames.pop(sorted_keys.pop(-1))) if params.clustering.exclude_single_crystal_clusters: keep_frames = [k for k in keep_frames if len(k) > 1] clustered_experiments = [[f.experiment for f in frame_cluster] for frame_cluster in keep_frames] clustered_reflections = [[f.reflections for f in frame_cluster] for frame_cluster in keep_frames] list_of_combined = combine_in_clusters( clustered_experiments, clustered_reflections, params.output.experiments_filename, params.output.reflections_filename, n_clusters, ) for saveable_tuple in list_of_combined: if params.output.max_batch_size is None: self._save_output(*saveable_tuple) else: save_in_batches(*saveable_tuple, batch_size=params.output.max_batch_size) else: if params.output.max_batch_size is None: self._save_output( experiments, reflections, params.output.experiments_filename, params.output.reflections_filename, ) else: save_in_batches( experiments, reflections, params.output.experiments_filename, params.output.reflections_filename, batch_size=params.output.max_batch_size, ) return
def unit_cell_analysis(self, cluster_threshold, output_dir, write_files=True): """ Calls unit cell analysis module, which uses hierarchical clustering (Zeldin, et al, Acta D, 2015) to split integration results according to detected morphological groupings (if any). Most useful with preliminary integration without target unit cell specified. """ # Will not run clustering if only one integration result found if len(self.final_objects) == 1: unit_cell = (self.final_objects[0].final['a'], self.final_objects[0].final['b'], self.final_objects[0].final['c'], self.final_objects[0].final['alpha'], self.final_objects[0].final['beta'], self.final_objects[0].final['gamma']) point_group = self.final_objects[0].final['sg'] misc.main_log(self.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) uc_line = "{:<6} {:^4}: {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, "\ "{:<6.2f}, {:<6.2f}".format('(1)', point_group, unit_cell[0], unit_cell[1], unit_cell[2], unit_cell[3], unit_cell[4], unit_cell[5]) misc.main_log(self.logfile, uc_line, True) self.cons_pg = point_group self.cons_uc = unit_cell else: uc_table = [] uc_summary = [] counter = 1 # run hierarchical clustering analysis ucs = Cluster.from_files(self.pickles, use_b=True) clusters, _ = ucs.ab_cluster(cluster_threshold, log=False, write_file_lists=False, schnell=False, doplot=False) uc_table.append("\n\n{:-^80}\n"\ "".format(' UNIT CELL ANALYSIS ')) # extract clustering info and add to summary output list for cluster in clusters: sorted_pg_comp = sorted(cluster.pg_composition.items(), key=lambda x: -1 * x[1]) pg_nums = [pg[1] for pg in sorted_pg_comp] cons_pg = sorted_pg_comp[np.argmax(pg_nums)] output_file = os.path.join(output_dir, "uc_cluster_{}.lst".format(counter)) # write out lists of output pickles that comprise clusters with > 1 members if len(cluster.members) > 1: counter += 1 # Sort clustered images by mosaicity, lowest to highest cluster_filenames = [j.path for j in cluster.members] clustered_objects = [i for i in self.final_objects if \ i.final['final'] in cluster_filenames] sorted_cluster = sorted(clustered_objects, key=lambda i: i.final['mos']) # Write to file if write_files: for obj in sorted_cluster: with open(output_file, 'a') as scf: scf.write('{}\n'.format(obj.final['final'])) mark_output = os.path.basename(output_file) else: mark_output = '*' output_file = None else: mark_output = '' output_file = None # format and record output uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) "\ "{}".format('({})'.format(len(cluster.members)), cons_pg[0], cluster.medians[0], cluster.stdevs[0], cluster.medians[1], cluster.stdevs[1], cluster.medians[2], cluster.stdevs[2], cluster.medians[3], cluster.stdevs[3], cluster.medians[4], cluster.stdevs[4], cluster.medians[5], cluster.stdevs[5], mark_output) uc_table.append(uc_line) uc_info = [len(cluster.members), cons_pg[0], cluster.medians, output_file, uc_line] uc_summary.append(uc_info) uc_table.append('\nMost common unit cell:\n') # select the most prevalent unit cell (most members in cluster) uc_freqs = [i[0] for i in uc_summary] uc_pick = uc_summary[np.argmax(uc_freqs)] uc_table.append(uc_pick[4]) self.cons_pg = uc_pick[1] self.cons_uc = uc_pick[2] if uc_pick[3] != None: self.prime_data_path = uc_pick[3] for item in uc_table: misc.main_log(self.logfile, item, True)
def unit_cell_analysis(self): """ Calls unit cell analysis module, which uses hierarchical clustering (Zeldin, et al, Acta D, 2015) to split integration results according to detected morphological groupings (if any). Most useful with preliminary integration without target unit cell specified. """ # Will not run clustering if only one integration result found or if turned off if not self.info.categories['integrated']: util.main_log(self.info.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) util.main_log(self.info.logfile, '\n UNIT CELL CANNOT BE DETERMINED!', True) elif len(self.info.categories['integrated']) == 1: unit_cell = (self.info.cluster_iterable[0][:5]) point_group = self.info.cluster_iterable[0][6] util.main_log(self.info.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) uc_line = "{:<6} {:^4}: {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, " \ "{:<6.2f}, {:<6.2f}".format('(1)', point_group, unit_cell[0], unit_cell[1], unit_cell[2], unit_cell[3], unit_cell[4], unit_cell[5]) util.main_log(self.info.logfile, uc_line, True) self.info.best_pg = str(point_group) self.info.best_uc = unit_cell else: uc_table = [] uc_summary = [] if self.params.analysis.clustering.flag_on: # run hierarchical clustering analysis from xfel.clustering.cluster import Cluster counter = 0 self.info.clusters = [] threshold = self.params.analysis.clustering.threshold cluster_limit = self.params.analysis.clustering.limit final_pickles = self.info.categories['integrated'][0] pickles = [] if self.params.analysis.clustering.n_images > 0: import random for i in range( len(self.params.analysis.clustering.n_images)): random_number = random.randrange(0, len(final_pickles)) if final_pickles[random_number] in pickles: while final_pickles[random_number] in pickles: random_number = random.randrange( 0, len(final_pickles)) pickles.append(final_pickles[random_number]) else: pickles = final_pickles # Cluster from files (slow, but will keep for now) ucs = Cluster.from_files(pickle_list=pickles) # Do clustering clusters, _ = ucs.ab_cluster(threshold=threshold, log=False, write_file_lists=False, schnell=False, doplot=False) uc_table.append("\n\n{:-^80}\n" \ "".format(' UNIT CELL ANALYSIS ')) # extract clustering info and add to summary output list if cluster_limit is None: if len(pickles) / 10 >= 10: cluster_limit = 10 else: cluster_limit = len(pickles) / 10 for cluster in clusters: sorted_pg_comp = sorted(cluster.pg_composition.items(), key=lambda x: -1 * x[1]) pg_nums = [pg[1] for pg in sorted_pg_comp] cons_pg = sorted_pg_comp[np.argmax(pg_nums)] if len(cluster.members) > cluster_limit: counter += 1 # Write to file cluster_filenames = [j.path for j in cluster.members] if self.params.analysis.clustering.write_files: output_file = os.path.join( self.info.int_base, "uc_cluster_{}.lst".format(counter)) for fn in cluster_filenames: with open(output_file, 'a') as scf: scf.write('{}\n'.format(fn)) mark_output = os.path.basename(output_file) else: mark_output = '*' output_file = None else: mark_output = '' output_file = None # Populate clustering info for GUI display uc_init = uctbx.unit_cell(cluster.medians) symmetry = crystal.symmetry(unit_cell=uc_init, space_group_symbol='P1') groups = metric_subgroups(input_symmetry=symmetry, max_delta=3) top_group = groups.result_groups[0] best_sg = str(groups.lattice_group_info()).split('(')[0] best_uc = top_group['best_subsym'].unit_cell().parameters() # best_sg = str(top_group['best_subsym'].space_group_info()) uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \ "{:<6.2f} {:<6.2f} {:<6.2f} " \ "".format(best_uc[0], best_uc[1], best_uc[2], best_uc[3], best_uc[4], best_uc[5]) cluster_info = { 'number': len(cluster.members), 'pg': best_sg, 'uc': uc_no_stdev, 'filename': mark_output } self.info.clusters.append(cluster_info) # format and record output # TODO: How to propagate stdevs after conversion from Niggli? # uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) "\ # "{}".format('({})'.format(len(cluster.members)), cons_pg[0], # cluster.medians[0], cluster.stdevs[0], # cluster.medians[1], cluster.stdevs[1], # cluster.medians[2], cluster.stdevs[2], # cluster.medians[3], cluster.stdevs[3], # cluster.medians[4], cluster.stdevs[4], # cluster.medians[5], cluster.stdevs[5], # mark_output) # uc_table.append(uc_line) uc_table.append("{:<6}: {} {}".format( len(cluster.members), uc_no_stdev, mark_output)) lattices = ', '.join( ['{} ({})'.format(i[0], i[1]) for i in sorted_pg_comp]) # uc_info = [len(cluster.members), cons_pg[0], cluster.medians, # output_file, uc_line, lattices] uc_info = [ len(cluster.members), best_sg, best_uc, output_file, uc_no_stdev, lattices ] uc_summary.append(uc_info) else: # generate average unit cell uc_table.append("\n\n{:-^80}\n" \ "".format(' UNIT CELL AVERAGING (no clustering) ')) uc_a, uc_b, uc_c, uc_alpha, \ uc_beta, uc_gamma, uc_sg = list(zip(*self.info.cluster_iterable)) cons_pg = Counter(uc_sg).most_common(1)[0][0] all_pgs = Counter(uc_sg).most_common() unit_cell = (np.median(uc_a), np.median(uc_b), np.median(uc_c), np.median(uc_alpha), np.median(uc_beta), np.median(uc_gamma)) # Populate clustering info for GUI display uc_init = uctbx.unit_cell(unit_cell) symmetry = crystal.symmetry(unit_cell=uc_init, space_group_symbol='P1') groups = metric_subgroups(input_symmetry=symmetry, max_delta=3) top_group = groups.result_groups[0] best_sg = str(groups.lattice_group_info()).split('(')[0] best_uc = top_group['best_subsym'].unit_cell().parameters() # best_sg = str(top_group['best_subsym'].space_group_info()) uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \ "{:<6.2f} {:<6.2f} {:<6.2f} " \ "".format(best_uc[0], best_uc[1], best_uc[2], best_uc[3], best_uc[4], best_uc[5]) cluster_info = { 'number': len(self.info.cluster_iterable), 'pg': best_sg, 'uc': uc_no_stdev, 'filename': None } self.info.clusters.append(cluster_info) # uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) " \ # "{}".format('({})'.format(len(self.final_objects)), cons_pg, # np.median(uc_a), np.std(uc_a), # np.median(uc_b), np.std(uc_b), # np.median(uc_c), np.std(uc_c), # np.median(uc_alpha), np.std(uc_alpha), # np.median(uc_beta), np.std(uc_beta), # np.median(uc_gamma), np.std(uc_gamma), '') # # uc_table.append(uc_line) uc_table.append(uc_no_stdev) lattices = ', '.join( ['{} ({})'.format(i[0], i[1]) for i in all_pgs]) # uc_info = [len(self.final_objects), cons_pg, unit_cell, None, # uc_line, lattices] uc_info = [ len(self.info.cluster_iterable), best_sg, best_uc, None, uc_no_stdev, lattices ] uc_summary.append(uc_info) uc_table.append('\nMost common unit cell:\n') # select the most prevalent unit cell (most members in cluster) uc_freqs = [i[0] for i in uc_summary] uc_pick = uc_summary[np.argmax(uc_freqs)] uc_table.append(uc_pick[4]) uc_table.append('\nBravais Lattices in Biggest Cluster: {}' ''.format(uc_pick[5])) self.info.best_pg = str(uc_pick[1]) self.info.best_uc = uc_pick[2] if uc_pick[3] is not None: self.prime_data_path = uc_pick[3] for item in uc_table: util.main_log(self.info.logfile, item, False) self.info.update(uc_table=uc_table) if self.gui_mode: return self.info.clusters
def run(args): import libtbx from libtbx import easy_pickle from dials.util import log from dials.util.options import OptionParser parser = OptionParser( #usage=usage, phil=phil_scope, read_reflections=True, read_datablocks=False, read_experiments=True, check_format=False, #epilog=help_message ) params, options, args = parser.parse_args(show_diff_phil=False, return_unhandled=True) # Configure the logging log.config(params.verbosity, info=params.output.log, debug=params.output.debug_log) from dials.util.version import dials_version logger.info(dials_version()) # Log the diff phil diff_phil = parser.diff_phil.as_str() if diff_phil is not '': logger.info('The following parameters have been modified:\n') logger.info(diff_phil) if params.seed is not None: import random flex.set_random_seed(params.seed) random.seed(params.seed) if params.save_plot and not params.animate: import matplotlib # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear matplotlib.use('Agg') # use a non-interactive backend datasets_input = [] experiments = flatten_experiments(params.input.experiments) reflections = flatten_reflections(params.input.reflections) if len(experiments) or len(reflections): if len(reflections) == 1: reflections_input = reflections[0] reflections = [] for i in range(len(experiments)): reflections.append( reflections_input.select(reflections_input['id'] == i)) if len(experiments) > len(reflections): flattened_reflections = [] for refl in reflections: for i in range(0, flex.max(refl['id']) + 1): sel = refl['id'] == i flattened_reflections.append(refl.select(sel)) reflections = flattened_reflections assert len(experiments) == len(reflections) i_refl = 0 for i_expt in enumerate(experiments): refl = reflections[i_refl] for expt, refl in zip(experiments, reflections): crystal_symmetry = crystal.symmetry( unit_cell=expt.crystal.get_unit_cell(), space_group=expt.crystal.get_space_group()) if 0 and 'intensity.prf.value' in refl: sel = refl.get_flags(refl.flags.integrated_prf) assert sel.count(True) > 0 refl = refl.select(sel) data = refl['intensity.prf.value'] variances = refl['intensity.prf.variance'] else: assert 'intensity.sum.value' in refl sel = refl.get_flags(refl.flags.integrated_sum) assert sel.count(True) > 0 refl = refl.select(sel) data = refl['intensity.sum.value'] variances = refl['intensity.sum.variance'] # FIXME probably need to do some filtering of intensities similar to that # done in export_mtz miller_indices = refl['miller_index'] assert variances.all_gt(0) sigmas = flex.sqrt(variances) miller_set = miller.set(crystal_symmetry, miller_indices, anomalous_flag=False) intensities = miller.array(miller_set, data=data, sigmas=sigmas) intensities.set_observation_type_xray_intensity() intensities.set_info( miller.array_info(source='DIALS', source_type='pickle')) datasets_input.append(intensities) files = args for file_name in files: try: data = easy_pickle.load(file_name) intensities = data['observations'][0] intensities.set_info( miller.array_info(source=file_name, source_type='pickle')) intensities = intensities.customized_copy( anomalous_flag=False).set_info(intensities.info()) batches = None except Exception: reader = any_reflection_file(file_name) assert reader.file_type() == 'ccp4_mtz' as_miller_arrays = reader.as_miller_arrays(merge_equivalents=False) intensities = [ ma for ma in as_miller_arrays if ma.info().labels == ['I', 'SIGI'] ][0] batches = [ ma for ma in as_miller_arrays if ma.info().labels == ['BATCH'] ] if len(batches): batches = batches[0] else: batches = None mtz_object = reader.file_content() intensities = intensities.customized_copy( anomalous_flag=False, indices=mtz_object.extract_original_index_miller_indices( )).set_info(intensities.info()) intensities.set_observation_type_xray_intensity() datasets_input.append(intensities) if len(datasets_input) == 0: raise Sorry('No valid reflection files provided on command line') datasets = [] for intensities in datasets_input: if params.batch is not None: assert batches is not None bmin, bmax = params.batch assert bmax >= bmin sel = (batches.data() >= bmin) & (batches.data() <= bmax) assert sel.count(True) > 0 intensities = intensities.select(sel) if params.min_i_mean_over_sigma_mean is not None and ( params.d_min is libtbx.Auto or params.d_min is not None): from xia2.Modules import Resolutionizer rparams = Resolutionizer.phil_defaults.extract().resolutionizer rparams.nbins = 20 resolutionizer = Resolutionizer.resolutionizer( intensities, None, rparams) i_mean_over_sigma_mean = 4 d_min = resolutionizer.resolution_i_mean_over_sigma_mean( i_mean_over_sigma_mean) if params.d_min is libtbx.Auto: intensities = intensities.resolution_filter( d_min=d_min).set_info(intensities.info()) if params.verbose: logger.info('Selecting reflections with d > %.2f' % d_min) elif d_min > params.d_min: logger.info('Rejecting dataset %s as d_min too low (%.2f)' % (file_name, d_min)) continue else: logger.info('Estimated d_min for %s: %.2f' % (file_name, d_min)) elif params.d_min not in (None, libtbx.Auto): intensities = intensities.resolution_filter( d_min=params.d_min).set_info(intensities.info()) if params.normalisation == 'kernel': from mmtbx.scaling import absolute_scaling normalisation = absolute_scaling.kernel_normalisation( intensities, auto_kernel=True) intensities = normalisation.normalised_miller.deep_copy() cb_op_to_primitive = intensities.change_of_basis_op_to_primitive_setting( ) intensities = intensities.change_basis(cb_op_to_primitive) if params.mode == 'full' or params.space_group is not None: if params.space_group is not None: space_group_info = params.space_group.primitive_setting() if not space_group_info.group().is_compatible_unit_cell( intensities.unit_cell()): logger.info( 'Skipping data set - incompatible space group and unit cell: %s, %s' % (space_group_info, intensities.unit_cell())) continue else: space_group_info = sgtbx.space_group_info('P1') intensities = intensities.customized_copy( space_group_info=space_group_info) datasets.append(intensities) crystal_symmetries = [d.crystal_symmetry().niggli_cell() for d in datasets] lattice_ids = range(len(datasets)) from xfel.clustering.cluster import Cluster from xfel.clustering.cluster_groups import unit_cell_info ucs = Cluster.from_crystal_symmetries(crystal_symmetries, lattice_ids=lattice_ids) threshold = 1000 if params.save_plot: from matplotlib import pyplot as plt fig = plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8)) ax = plt.gca() else: ax = None clusters, _ = ucs.ab_cluster(params.unit_cell_clustering.threshold, log=params.unit_cell_clustering.log, write_file_lists=False, schnell=False, doplot=params.save_plot, ax=ax) if params.save_plot: plt.tight_layout() plt.savefig('%scluster_unit_cell.png' % params.plot_prefix) plt.close(fig) logger.info(unit_cell_info(clusters)) largest_cluster = None largest_cluster_lattice_ids = None for cluster in clusters: cluster_lattice_ids = [m.lattice_id for m in cluster.members] if largest_cluster_lattice_ids is None: largest_cluster_lattice_ids = cluster_lattice_ids elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids): largest_cluster_lattice_ids = cluster_lattice_ids dataset_selection = largest_cluster_lattice_ids if len(dataset_selection) < len(datasets): logger.info('Selecting subset of data for cosym analysis: %s' % str(dataset_selection)) datasets = [datasets[i] for i in dataset_selection] # per-dataset change of basis operator to ensure all consistent change_of_basis_ops = [] for i, dataset in enumerate(datasets): metric_subgroups = sgtbx.lattice_symmetry.metric_subgroups(dataset, max_delta=5) subgroup = metric_subgroups.result_groups[0] cb_op_inp_best = subgroup['cb_op_inp_best'] datasets[i] = dataset.change_basis(cb_op_inp_best) change_of_basis_ops.append(cb_op_inp_best) cb_op_ref_min = datasets[0].change_of_basis_op_to_niggli_cell() for i, dataset in enumerate(datasets): if params.space_group is None: datasets[i] = dataset.change_basis(cb_op_ref_min).customized_copy( space_group_info=sgtbx.space_group_info('P1')) else: datasets[i] = dataset.change_basis(cb_op_ref_min) datasets[i] = datasets[i].customized_copy( crystal_symmetry=crystal.symmetry( unit_cell=datasets[i].unit_cell(), space_group_info=params.space_group.primitive_setting(), assert_is_compatible_unit_cell=False)) datasets[i] = datasets[i].merge_equivalents().array() change_of_basis_ops[i] = cb_op_ref_min * change_of_basis_ops[i] result = analyse_datasets(datasets, params) space_groups = {} reindexing_ops = {} for dataset_id in result.reindexing_ops.iterkeys(): if 0 in result.reindexing_ops[dataset_id]: cb_op = result.reindexing_ops[dataset_id][0] reindexing_ops.setdefault(cb_op, []) reindexing_ops[cb_op].append(dataset_id) if dataset_id in result.space_groups: space_groups.setdefault(result.space_groups[dataset_id], []) space_groups[result.space_groups[dataset_id]].append(dataset_id) logger.info('Space groups:') for sg, datasets in space_groups.iteritems(): logger.info(str(sg.info().reference_setting())) logger.info(datasets) logger.info('Reindexing operators:') for cb_op, datasets in reindexing_ops.iteritems(): logger.info(cb_op) logger.info(datasets) if (len(experiments) and len(reflections) and params.output.reflections is not None and params.output.experiments is not None): import copy from dxtbx.model import ExperimentList from dxtbx.serialize import dump reindexed_experiments = ExperimentList() reindexed_reflections = flex.reflection_table() expt_id = 0 for cb_op, dataset_ids in reindexing_ops.iteritems(): cb_op = sgtbx.change_of_basis_op(cb_op) for dataset_id in dataset_ids: expt = experiments[dataset_selection[dataset_id]] refl = reflections[dataset_selection[dataset_id]] reindexed_expt = copy.deepcopy(expt) refl_reindexed = copy.deepcopy(refl) cb_op_this = cb_op * change_of_basis_ops[dataset_id] reindexed_expt.crystal = reindexed_expt.crystal.change_basis( cb_op_this) refl_reindexed['miller_index'] = cb_op_this.apply( refl_reindexed['miller_index']) reindexed_experiments.append(reindexed_expt) refl_reindexed['id'] = flex.int(refl_reindexed.size(), expt_id) reindexed_reflections.extend(refl_reindexed) expt_id += 1 logger.info('Saving reindexed experiments to %s' % params.output.experiments) dump.experiment_list(reindexed_experiments, params.output.experiments) logger.info('Saving reindexed reflections to %s' % params.output.reflections) reindexed_reflections.as_pickle(params.output.reflections) elif params.output.suffix is not None: for cb_op, dataset_ids in reindexing_ops.iteritems(): cb_op = sgtbx.change_of_basis_op(cb_op) for dataset_id in dataset_ids: file_name = files[dataset_selection[dataset_id]] basename = os.path.basename(file_name) out_name = os.path.splitext( basename)[0] + params.output.suffix + '_' + str( dataset_selection[dataset_id]) + ".mtz" reader = any_reflection_file(file_name) assert reader.file_type() == 'ccp4_mtz' mtz_object = reader.file_content() cb_op_this = cb_op * change_of_basis_ops[dataset_id] if not cb_op_this.is_identity_op(): logger.info('reindexing %s (%s)' % (file_name, cb_op_this.as_xyz())) mtz_object.change_basis_in_place(cb_op_this) mtz_object.write(out_name)