Ejemplo n.º 1
0
def test_make_cluster_plots():
    from xfel.clustering.cluster import Cluster

    c1 = Cluster.from_iterable([
        (10.0, 10.0, 10.0, 90, 90, 90, "P1"),
        (10.1, 10.1, 10.1, 90, 90, 90, "P1"),
        (10.2, 10.2, 10.2, 90, 90, 90, "P1"),
    ])
    c2 = Cluster.from_iterable([
        (11.0, 11.0, 11.0, 90, 90, 90, "P1"),
        (11.1, 11.1, 11.1, 90, 90, 90, "P1"),
        (11.2, 11.2, 11.2, 90, 90, 90, "P1"),
        (11.3, 11.3, 11.3, 90, 90, 90, "P1"),
    ])
    clusters = [c1, c2]
    plots = make_cluster_plots(clusters)
    assert "uc_scatter_0" in plots
    assert "uc_scatter_1" in plots
    assert "uc_hist_0" in plots
    assert "uc_hist_1" in plots
    print(plots)
    assert len(plots["uc_hist_0"]["data"]) == 3
    assert len(plots["uc_hist_0"]["data"][0]["x"]) == 3
    assert len(plots["uc_hist_1"]["data"][0]["x"]) == 4
    assert len(plots["uc_scatter_0"]["data"]) == 3
    assert len(plots["uc_scatter_0"]["data"][0]["x"]) == 3
    assert len(plots["uc_scatter_1"]["data"][0]["x"]) == 4
Ejemplo n.º 2
0
def run(_args):
    if _args < 2:
        raise IOError("Must provide location(s) of pickles")
    if _args.paths:
        ucs = Cluster.from_files(raw_input=_args.dirs,
                                 n_images=_args.n,
                                 dials=_args.dials)
    else:
        ucs = Cluster.from_directories(_args.dirs,
                                       n_images=_args.n,
                                       dials=_args.dials)

    if not _args.noplot:
        clusters, _ = ucs.ab_cluster(_args.t,
                                     log=_args.log,
                                     write_file_lists=_args.nofiles,
                                     schnell=_args.schnell,
                                     doplot=_args.noplot)
        print unit_cell_info(clusters)
    else:
        plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8))
        ax = plt.gca()
        clusters, cluster_axes = ucs.ab_cluster(_args.t,
                                                log=_args.log,
                                                ax=ax,
                                                write_file_lists=_args.nofiles,
                                                schnell=_args.schnell,
                                                doplot=_args.noplot)
        print unit_cell_info(clusters)
        plt.tight_layout()
        plt.show()
Ejemplo n.º 3
0
  def __init__(self, vertices, min_common_reflections=10):
    """
    Extends the constructor from cluster.Cluster to describe the cluster as a
    graph.

    :param min_common_reflections: number of reflections two images must have in
    common for an edge to be created.
    """
    Cluster.__init__(self, vertices, "Graph cluster", "made as a graph")
    self.common_miller_threshold = min_common_reflections
    self.edges = self._make_edges()
Ejemplo n.º 4
0
  def __init__(self, vertices, min_common_reflections=10):
    """
    Extends the constructor from cluster.Cluster to describe the cluster as a
    graph.

    :param min_common_reflections: number of reflections two images must have in
    common for an edge to be created.
    """
    Cluster.__init__(self, vertices, "Graph cluster", "made as a graph")
    self.common_miller_threshold = min_common_reflections
    self.edges = self._make_edges()
Ejemplo n.º 5
0
def run(_args):
    if _args < 2:
        raise IOError("Must give at least one path to folder of pickles")

    ucs = Cluster.from_directories(_args.folders, "cluster_42")
    logging.info("Data imported.")

    #  Set up mega-plot
    plt.figure(figsize=(22, 15))
    gs = gridspec.GridSpec(3, 3, height_ratios=[1, 1, 3])
    orr_axes = [plt.subplot(gs[0, 0]), plt.subplot(gs[0, 1]), plt.subplot(gs[0, 2])]
    inten_axes = [plt.subplot(gs[1, 0]), plt.subplot(gs[1, 1]), plt.subplot(gs[1, 2])]
    clust_ax = plt.subplot(gs[2, :])

    orr_axes = ucs.visualise_orientational_distribution(orr_axes, cbar=True)
    inten_axes = ucs.intensity_statistics(inten_axes)
    clusters, cluster_ax = ucs.ab_cluster(
        _args.t, log=_args.log, ax=clust_ax, schnell=_args.fast, write_file_lists=False
    )

    # plt.text("cluster.42 Plot Everything!")
    plt.tight_layout()

    print unit_cell_info(clusters)
    plt.show()
Ejemplo n.º 6
0
def run(_args):
  if _args < 2:
    raise IOError("Must give at least one path to folder of pickles")

  ucs = Cluster.from_directories(_args.folders, "cluster_42")
  logging.info("Data imported.")

  #  Set up mega-plot
  plt.figure(figsize=(22, 15))
  gs = gridspec.GridSpec(3, 3, height_ratios=[1, 1, 3])
  orr_axes = [plt.subplot(gs[0, 0]),
              plt.subplot(gs[0, 1]),
              plt.subplot(gs[0, 2])]
  inten_axes = [plt.subplot(gs[1, 0]),
                plt.subplot(gs[1, 1]),
                plt.subplot(gs[1, 2])]
  clust_ax = plt.subplot(gs[2, :])


  orr_axes = ucs.visualise_orientational_distribution(orr_axes, cbar=True)
  inten_axes = ucs.intensity_statistics(inten_axes)
  clusters, cluster_ax = ucs.ab_cluster(_args.t, log=_args.log, ax=clust_ax,
                                        schnell=_args.fast, write_file_lists=False)

  #plt.text("cluster.42 Plot Everything!")
  plt.tight_layout()

  print(unit_cell_info(clusters))
  plt.show()
Ejemplo n.º 7
0
def run(_args):
    if _args < 2:
        raise IOError("Must give at least one path to folder of pickles")
    ucs = Cluster.from_directories(_args.folders,
                                   "cxi_targt_uc",
                                   n_images=args.n)

    if not _args.noplot:
        clusters, _ = ucs.ab_cluster(_args.t,
                                     log=_args.log,
                                     write_file_lists=_args.nofiles,
                                     schnell=_args.schnell,
                                     doplot=_args.noplot)
    else:
        plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8))
        ax = plt.gca()
        clusters, cluster_axes = ucs.ab_cluster(_args.t,
                                                log=_args.log,
                                                ax=ax,
                                                write_file_lists=_args.nofiles,
                                                schnell=_args.schnell,
                                                doplot=_args.noplot)
        plt.tight_layout()
        plt.show()

    print unit_cell_info(clusters)
Ejemplo n.º 8
0
    def __init__(self,
                 experiments,
                 reflections,
                 dendrogram=False,
                 threshold=1000,
                 n_max=None):
        try:
            from xfel.clustering.cluster import Cluster
            from xfel.clustering.cluster_groups import unit_cell_info
        except ImportError:
            raise Sorry("clustering is not configured")
        import matplotlib.pyplot as plt

        ucs = Cluster.from_expts(refl_table=reflections,
                                 expts_list=experiments,
                                 n_images=n_max)
        self.clusters, _ = ucs.ab_cluster(
            threshold=threshold,
            log=True,  # log scale
            ax=plt.gca() if dendrogram else None,
            write_file_lists=False,
            schnell=False,
            doplot=dendrogram,
        )
        print(unit_cell_info(self.clusters))
        self.clustered_frames = {
            int(c.cname.split("_")[1]): c.members
            for c in self.clusters
        }
        if dendrogram:
            plt.tight_layout()
            plt.show()
Ejemplo n.º 9
0
  def __init__(self, vertices, min_common_reflections=10):
    """
    Extends the constructor from cluster.Cluster to describe the cluster as a
    graph.

    :param min_common_reflections: number of reflections two images must have in
    common for an edge to be created.
    """
    # cleanup old edges, if graph was made from a previous graph.
    for v in vertices:
      v.edges = []

    Cluster.__init__(self, vertices, "Graph cluster", "made as a graph")
    self.common_miller_threshold = min_common_reflections
    self.edges = self._make_edges()
    self.vert_dict = {v: i for i, v in enumerate(self.members)}
Ejemplo n.º 10
0
def do_cluster_analysis(crystal_symmetries, params):
    ucs = Cluster.from_crystal_symmetries(crystal_symmetries)

    if params.plot.show or params.plot.name is not None:
        if not params.plot.show:
            import matplotlib

            # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear
            matplotlib.use("Agg")  # use a non-interactive backend
        import matplotlib.pyplot as plt

        plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8))
        ax = plt.gca()
        clusters, cluster_axes = ucs.ab_cluster(
            params.threshold,
            log=params.plot.log,
            ax=ax,
            write_file_lists=False,
            doplot=True,
        )
        print(unit_cell_info(clusters))
        plt.tight_layout()
        if params.plot.name is not None:
            plt.savefig(params.plot.name)
        if params.plot.show:
            plt.show()

    else:
        clusters, cluster_axes = ucs.ab_cluster(params.threshold,
                                                log=params.plot.log,
                                                write_file_lists=False,
                                                doplot=False)
        print(unit_cell_info(clusters))

    return clusters
Ejemplo n.º 11
0
def run(_args):
    if _args < 2:
        raise IOError("Must give at least one path to folder of pickles")
    import logging
    from xfel.clustering.cluster import Cluster
    FORMAT = '%(levelname)s %(module)s.%(funcName)s: %(message)s'
    logging.basicConfig(level=logging.WARNING, format=FORMAT)

    cluster = Cluster.from_directories(_args.folders,
                                       'Command line visualisation')
    logging.info("data imported")
    cluster.visualise_orientational_distribution()
def run(_args):
  if _args < 2:
    raise IOError("Must give at least one path to folder of pickles")
  import logging
  from xfel.clustering.cluster import Cluster
  FORMAT = '%(levelname)s %(module)s.%(funcName)s: %(message)s'
  logging.basicConfig(level=logging.WARNING, format=FORMAT)

  cluster = Cluster.from_directories(_args.folders,
                                          'Command line visualisation')
  logging.info("data imported")
  cluster.visualise_orientational_distribution()
def run(_args):
    if _args < 2:
        raise IOError("Must give at least one path to folder of pickles")

    ucs = Cluster.from_directories(_args.folders, "Per-frame-Wilson")
    logging.info("Data imported.")

    fig = plt.figure(figsize=(10, 10))
    ax = plt.gca()
    ucs.members[0].plot_wilson(ax=ax)

    browser = Key_event(ax, ucs.members, fig)

    fig.canvas.mpl_connect('key_press_event', browser.key_event)
    plt.show()
def run(_args):
  if _args < 2:
    raise IOError("Must give at least one path to folder of pickles")

  ucs = Cluster.from_directories(_args.folders, "Per-frame-Wilson")
  logging.info("Data imported.")


  fig = plt.figure(figsize=(10,10))
  ax = plt.gca()
  ucs.members[0].plot_wilson(ax=ax)

  browser = Key_event(ax, ucs.members, fig)

  fig.canvas.mpl_connect('key_press_event', browser.key_event)
  plt.show()
Ejemplo n.º 15
0
def do_cluster_analysis(crystal_symmetries, params):

    try:
        from xfel.clustering.cluster import Cluster
        from xfel.clustering.cluster_groups import unit_cell_info
    except ImportError:
        raise Sorry(
            "cluster_unit_cell requires xfel module but is not available")

    ucs = Cluster.from_crystal_symmetries(crystal_symmetries)

    if params.plot.show or params.plot.name is not None:
        if not params.plot.show:
            import matplotlib

            # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear
            matplotlib.use("Agg")  # use a non-interactive backend
        import matplotlib.pyplot as plt

        plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8))
        ax = plt.gca()
        clusters, cluster_axes = ucs.ab_cluster(
            params.threshold,
            log=params.plot.log,
            ax=ax,
            write_file_lists=False,
            # schnell=_args.schnell,
            doplot=True,
        )
        print(unit_cell_info(clusters))
        plt.tight_layout()
        if params.plot.name is not None:
            plt.savefig(params.plot.name)
        if params.plot.show:
            plt.show()

    else:
        clusters, cluster_axes = ucs.ab_cluster(
            params.threshold,
            log=params.plot.log,
            write_file_lists=False,
            # schnell=_args.schnell,
            doplot=False,
        )
        print(unit_cell_info(clusters))

    return clusters
Ejemplo n.º 16
0
    def unit_cell_clustering(self, plot_name=None):
        crystal_symmetries = []
        for expt in self._data_manager.experiments:
            crystal_symmetry = expt.crystal.get_crystal_symmetry(
                assert_is_compatible_unit_cell=False)
            crystal_symmetries.append(crystal_symmetry.niggli_cell())
        lattice_ids = [
            expt.identifier for expt in self._data_manager.experiments
        ]
        from xfel.clustering.cluster import Cluster
        from xfel.clustering.cluster_groups import unit_cell_info
        ucs = Cluster.from_crystal_symmetries(crystal_symmetries,
                                              lattice_ids=lattice_ids)
        if plot_name is not None:
            from matplotlib import pyplot as plt
            plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8))
            ax = plt.gca()
        else:
            ax = None
        clusters, _ = ucs.ab_cluster(
            self._params.unit_cell_clustering.threshold,
            log=self._params.unit_cell_clustering.log,
            write_file_lists=False,
            schnell=False,
            doplot=(plot_name is not None),
            ax=ax)
        if plot_name is not None:
            plt.tight_layout()
            plt.savefig(plot_name)
            plt.clf()
        logger.info(unit_cell_info(clusters))
        largest_cluster = None
        largest_cluster_lattice_ids = None
        for cluster in clusters:
            cluster_lattice_ids = [m.lattice_id for m in cluster.members]
            if largest_cluster_lattice_ids is None:
                largest_cluster_lattice_ids = cluster_lattice_ids
            elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids):
                largest_cluster_lattice_ids = cluster_lattice_ids

        if len(largest_cluster_lattice_ids) < len(crystal_symmetries):
            logger.info(
                'Selecting subset of data sets for subsequent analysis: %s' %
                str(largest_cluster_lattice_ids))
            self._data_manager.select(largest_cluster_lattice_ids)
        else:
            logger.info('Using all data sets for subsequent analysis')
Ejemplo n.º 17
0
def run(_args):
  if _args < 2:
    raise IOError("Must give at least one path to folder of pickles")

  ucs = Cluster.from_directories(_args.folders, "cluster_intensity_stats")
  logging.info("Data imported.")
  plt.figure(figsize=(20,10))
  gs = gridspec.GridSpec(3, 2, width_ratios=[1, 3])
  inten_axes = [plt.subplot(gs[0,0]),
                plt.subplot(gs[1,0]),
                plt.subplot(gs[2,0])]
  big_axes = plt.subplot(gs[:,1])

  ucs.intensity_statistics(ax=inten_axes)
  ucs.all_frames_intensity_stats(ax=big_axes)
  plt.tight_layout()
  plt.show()
def run(_args):
  if _args < 2:
    raise IOError("Must give at least one path to folder of pickles")

  ucs = Cluster.from_directories(_args.folders, "cluster_intensity_stats")
  logging.info("Data imported.")
  plt.figure(figsize=(20,10))
  gs = gridspec.GridSpec(3, 2, width_ratios=[1, 3])
  inten_axes = [plt.subplot(gs[0,0]),
                plt.subplot(gs[1,0]),
                plt.subplot(gs[2,0])]
  big_axes = plt.subplot(gs[:,1])

  ucs.intensity_statistics(ax=inten_axes)
  ucs.all_frames_intensity_stats(ax=big_axes)
  plt.tight_layout()
  plt.show()
Ejemplo n.º 19
0
    def run(self, iterable):

        # with Capturing() as junk_output:
        errors = []
        try:
            ucs = Cluster.from_iterable(iterable=iterable)
            clusters, _ = ucs.ab_cluster(5000,
                                         log=False,
                                         write_file_lists=False,
                                         schnell=True,
                                         doplot=False)
        except Exception as e:
            print("IOTA ERROR (CLUSTERING): ", e)
            clusters = []
            errors.append(str(e))

        info = []
        if clusters:
            for cluster in clusters:
                uc_init = unit_cell(cluster.medians)
                symmetry = crystal.symmetry(unit_cell=uc_init,
                                            space_group_symbol="P1")
                groups = lattice_symmetry.metric_subgroups(
                    input_symmetry=symmetry, max_delta=3)
                top_group = groups.result_groups[0]
                best_sg = str(groups.lattice_group_info()).split("(")[0]
                best_uc = top_group["best_subsym"].unit_cell().parameters()
                uc_no_stdev = ("{:<6.2f} {:<6.2f} {:<6.2f} "
                               "{:<6.2f} {:<6.2f} {:<6.2f} "
                               "".format(
                                   best_uc[0],
                                   best_uc[1],
                                   best_uc[2],
                                   best_uc[3],
                                   best_uc[4],
                                   best_uc[5],
                               ))
                cluster_info = {
                    "number": len(cluster.members),
                    "pg": str(best_sg),
                    "uc": uc_no_stdev,
                }
                info.append(cluster_info)

        return info, errors
Ejemplo n.º 20
0
    def run(self, iterable):

        with Capturing() as junk_output:
            try:
                ucs = Cluster.from_iterable(iterable=iterable)
                clusters, _ = ucs.ab_cluster(5000,
                                             log=False,
                                             write_file_lists=False,
                                             schnell=True,
                                             doplot=False)
            except Exception:
                clusters = []

        if len(clusters) > 0:
            info = []
            for cluster in clusters:
                uc_init = unit_cell(cluster.medians)
                symmetry = crystal.symmetry(unit_cell=uc_init,
                                            space_group_symbol='P1')
                groups = lattice_symmetry.metric_subgroups(
                    input_symmetry=symmetry, max_delta=3)
                top_group = groups.result_groups[0]
                best_uc = top_group['best_subsym'].unit_cell().parameters()
                best_sg = top_group['best_subsym'].space_group_info()

                uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \
                              "{:<6.2f} {:<6.2f} {:<6.2f} " \
                              "".format(best_uc[0], best_uc[1], best_uc[2],
                                        best_uc[3], best_uc[4], best_uc[5])
                cluster_info = {
                    'number': len(cluster.members),
                    'pg': str(best_sg),
                    'uc': uc_no_stdev
                }

                info.append(cluster_info)

        else:
            info = None

        return info
Ejemplo n.º 21
0
    def cluster_unit_cells(self):
        input = []
        for item in self.spotfinding_info:
            if item[4] is not None:
                try:
                    info_line = [float(i) for i in item[4]]
                    info_line.append(item[3])
                    input.append(info_line)
                except ValueError:
                    pass

        with misc.Capturing() as junk_output:
            try:
                ucs = Cluster.from_iterable(iterable=input)
                clusters, _ = ucs.ab_cluster(5000,
                                             log=False,
                                             write_file_lists=False,
                                             schnell=True,
                                             doplot=False)
            except Exception, e:
                clusters = []
Ejemplo n.º 22
0
def run(_args):
  if _args < 2:
    raise IOError("Must give at least one path to folder of pickles")
  ucs = Cluster.from_directories(_args.folders, "cxi_targt_uc")

  if not _args.noplot:
    clusters, _ = ucs.ab_cluster(_args.t, log=_args.log,
                               write_file_lists=_args.nofiles,
                               schnell=_args.schnell,
                               doplot=_args.noplot)
  else:
    plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8))
    ax = plt.gca()
    clusters, cluster_axes = ucs.ab_cluster(_args.t, log=_args.log, ax=ax,
                                            write_file_lists=_args.nofiles,
                                            schnell=_args.schnell,
                                            doplot=_args.noplot)
    plt.tight_layout()
    plt.show()

  print unit_cell_info(clusters)
Ejemplo n.º 23
0
def report_on_crystal_clusters(crystal_symmetries, make_plots=True):
    ucs = Cluster.from_crystal_symmetries(crystal_symmetries)
    clusters, _ = ucs.ab_cluster(5000,
                                 log=None,
                                 write_file_lists=False,
                                 doplot=False)
    cluster_plots = {}
    min_cluster_pc = 5
    threshold = math.floor((min_cluster_pc / 100) * len(crystal_symmetries))
    large_clusters = [c for c in clusters if len(c.members) > threshold]
    large_clusters.sort(key=lambda x: len(x.members), reverse=True)

    if large_clusters:
        logger.info(f"""
Unit cell clustering analysis, clusters with >{min_cluster_pc}% of the number of crystals indexed
{unit_cell_info(large_clusters)}
""")
        if make_plots:
            cluster_plots = make_cluster_plots(large_clusters)
    else:
        logger.info(
            f"No clusters found with >{min_cluster_pc}% of the number of crystals."
        )
    return cluster_plots, large_clusters
Ejemplo n.º 24
0
    def unit_cell_analysis(self):
        """ Calls unit cell analysis module, which uses hierarchical clustering
        (Zeldin, et al, Acta D, 2015) to split integration results according to
        detected morphological groupings (if any). Most useful with preliminary
        integration without target unit cell specified. """

        # Will not run clustering if only one integration result found or if turned off
        if self.final_objects is None:
            self.cons_uc = None
            self.cons_pg = None
            misc.main_log(self.logfile,
                          "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True)
            misc.main_log(self.logfile, '\n UNIT CELL CANNOT BE DETERMINED!',
                          True)

        elif len(self.final_objects) == 1:
            unit_cell = (self.final_objects[0].final['a'],
                         self.final_objects[0].final['b'],
                         self.final_objects[0].final['c'],
                         self.final_objects[0].final['alpha'],
                         self.final_objects[0].final['beta'],
                         self.final_objects[0].final['gamma'])
            point_group = self.final_objects[0].final['sg']
            misc.main_log(self.logfile,
                          "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True)
            uc_line = "{:<6} {:^4}:  {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, "\
                      "{:<6.2f}, {:<6.2f}".format('(1)', point_group,
                            unit_cell[0], unit_cell[1], unit_cell[2],
                            unit_cell[3], unit_cell[4], unit_cell[5])
            misc.main_log(self.logfile, uc_line, True)

            self.cons_pg = point_group
            self.cons_uc = unit_cell

        else:
            uc_table = []
            uc_summary = []

            if self.params.analysis.run_clustering:
                # run hierarchical clustering analysis
                from xfel.clustering.cluster import Cluster
                counter = 0

                threshold = self.params.analysis.cluster_threshold
                cluster_limit = self.params.analysis.cluster_limit
                if self.params.analysis.cluster_n_images > 0:
                    n_images = self.params.analysis.cluster_n_images
                else:
                    n_images = len(self.final_objects)

                obj_list = []
                if n_images < len(self.final_objects):
                    import random
                    for i in range(n_images):
                        random_number = random.randrange(
                            0, len(self.final_objects))
                        if self.final_objects[random_number] in obj_list:
                            while self.final_objects[
                                    random_number] in obj_list:
                                random_number = random.randrange(
                                    0, len(self.final_objects))
                            obj_list.append(self.final_objects[random_number])
                        else:
                            obj_list.append(self.final_objects[random_number])
                if obj_list == []:
                    obj_list = self.final_objects

                # Cluster from iterable (this doesn't keep filenames - bad!)
                # with Capturing() as suppressed_output:
                #   uc_iterable = []
                #   for obj in obj_list:
                #     unit_cell = (float(obj.final['a']),
                #                  float(obj.final['b']),
                #                  float(obj.final['c']),
                #                  float(obj.final['alpha']),
                #                  float(obj.final['beta']),
                #                  float(obj.final['gamma']),
                #                  obj.final['sg'])
                #     uc_iterable.append(unit_cell)
                #   ucs = Cluster.from_iterable(iterable=uc_iterable)

                # Cluster from files (slow, but will keep for now)
                ucs = Cluster.from_files(pickle_list=self.pickles)

                # Do clustering
                clusters, _ = ucs.ab_cluster(threshold=threshold,
                                             log=False,
                                             write_file_lists=False,
                                             schnell=False,
                                             doplot=False)
                uc_table.append("\n\n{:-^80}\n"\
                                "".format(' UNIT CELL ANALYSIS '))

                # extract clustering info and add to summary output list
                if cluster_limit is None:
                    if len(self.pickles) / 10 >= 10:
                        cluster_limit = 10
                    else:
                        cluster_limit = len(self.pickles) / 10

                for cluster in clusters:
                    sorted_pg_comp = sorted(cluster.pg_composition.items(),
                                            key=lambda x: -1 * x[1])
                    pg_nums = [pg[1] for pg in sorted_pg_comp]
                    cons_pg = sorted_pg_comp[np.argmax(pg_nums)]

                    if len(cluster.members) > cluster_limit:
                        counter += 1

                        # Sort clustered images by mosaicity, lowest to highest
                        cluster_filenames = [j.path for j in cluster.members]
                        clustered_objects = [i for i in self.final_objects if \
                                             i.final['final'] in cluster_filenames]
                        sorted_cluster = sorted(clustered_objects,
                                                key=lambda i: i.final['mos'])
                        # Write to file
                        if self.params.analysis.cluster_write_files:
                            output_file = os.path.join(
                                self.output_dir,
                                "uc_cluster_{}.lst".format(counter))
                            for obj in sorted_cluster:
                                with open(output_file, 'a') as scf:
                                    scf.write('{}\n'.format(
                                        obj.final['final']))

                            mark_output = os.path.basename(output_file)
                        else:
                            mark_output = '*'
                            output_file = None

                    else:
                        mark_output = ''
                        output_file = None

                    # Populate clustering info for GUI display
                    uc_init = uctbx.unit_cell(cluster.medians)
                    symmetry = crystal.symmetry(unit_cell=uc_init,
                                                space_group_symbol='P1')
                    groups = sgtbx.lattice_symmetry.\
                      metric_subgroups(input_symmetry=symmetry, max_delta=3)
                    top_group = groups.result_groups[0]
                    best_uc = top_group['best_subsym'].unit_cell().parameters()
                    best_sg = top_group['best_subsym'].space_group_info()

                    uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \
                                  "{:<6.2f} {:<6.2f} {:<6.2f} " \
                                  "".format(best_uc[0], best_uc[1], best_uc[2],
                                            best_uc[3], best_uc[4], best_uc[5])
                    cluster_info = {
                        'number': len(cluster.members),
                        'pg': best_sg,
                        'uc': uc_no_stdev,
                        'filename': mark_output
                    }
                    self.clusters.append(cluster_info)

                    # format and record output
                    # TODO: How to propagate stdevs after conversion from Niggli?
                    # uc_line = "{:<6} {:^4}:  {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                    #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                    #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f})   "\
                    #           "{}".format('({})'.format(len(cluster.members)), cons_pg[0],
                    #                                 cluster.medians[0], cluster.stdevs[0],
                    #                                 cluster.medians[1], cluster.stdevs[1],
                    #                                 cluster.medians[2], cluster.stdevs[2],
                    #                                 cluster.medians[3], cluster.stdevs[3],
                    #                                 cluster.medians[4], cluster.stdevs[4],
                    #                                 cluster.medians[5], cluster.stdevs[5],
                    #                                 mark_output)
                    # uc_table.append(uc_line)
                    uc_table.append("{:<6}:  {} {}".format(
                        len(cluster.members), uc_no_stdev, mark_output))
                    lattices = ', '.join(
                        ['{} ({})'.format(i[0], i[1]) for i in sorted_pg_comp])
                    # uc_info = [len(cluster.members), cons_pg[0], cluster.medians,
                    #            output_file, uc_line, lattices]
                    uc_info = [
                        len(cluster.members), best_sg, best_uc, output_file,
                        uc_no_stdev, lattices
                    ]
                    uc_summary.append(uc_info)

            else:

                # generate average unit cell
                uc_table.append("\n\n{:-^80}\n" \
                                "".format(' UNIT CELL AVERAGING (no clustering) '))
                uc_a = [i.final['a'] for i in self.final_objects]
                uc_b = [i.final['b'] for i in self.final_objects]
                uc_c = [i.final['c'] for i in self.final_objects]
                uc_alpha = [i.final['alpha'] for i in self.final_objects]
                uc_beta = [i.final['beta'] for i in self.final_objects]
                uc_gamma = [i.final['gamma'] for i in self.final_objects]
                uc_sg = [i.final['sg'] for i in self.final_objects]
                cons_pg = Counter(uc_sg).most_common(1)[0][0]
                all_pgs = Counter(uc_sg).most_common()
                unit_cell = (np.median(uc_a), np.median(uc_b), np.median(uc_c),
                             np.median(uc_alpha), np.median(uc_beta),
                             np.median(uc_gamma))

                # Populate clustering info for GUI display
                uc_init = uctbx.unit_cell(unit_cell)
                symmetry = crystal.symmetry(unit_cell=uc_init,
                                            space_group_symbol='P1')
                groups = sgtbx.lattice_symmetry. \
                  metric_subgroups(input_symmetry=symmetry, max_delta=3)
                top_group = groups.result_groups[0]
                best_uc = top_group['best_subsym'].unit_cell().parameters()
                best_sg = top_group['best_subsym'].space_group_info()

                uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \
                              "{:<6.2f} {:<6.2f} {:<6.2f} " \
                              "".format(best_uc[0], best_uc[1], best_uc[2],
                                        best_uc[3], best_uc[4], best_uc[5])
                cluster_info = {
                    'number': len(self.final_objects),
                    'pg': best_sg,
                    'uc': uc_no_stdev,
                    'filename': None
                }
                self.clusters.append(cluster_info)

                # uc_line = "{:<6} {:^4}:  {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \
                #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \
                #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f})   " \
                #           "{}".format('({})'.format(len(self.final_objects)), cons_pg,
                #                       np.median(uc_a), np.std(uc_a),
                #                       np.median(uc_b), np.std(uc_b),
                #                       np.median(uc_c), np.std(uc_c),
                #                       np.median(uc_alpha), np.std(uc_alpha),
                #                       np.median(uc_beta), np.std(uc_beta),
                #                       np.median(uc_gamma), np.std(uc_gamma), '')
                #
                # uc_table.append(uc_line)
                uc_table.append(uc_no_stdev)
                lattices = ', '.join(
                    ['{} ({})'.format(i[0], i[1]) for i in all_pgs])
                # uc_info = [len(self.final_objects), cons_pg, unit_cell, None,
                #            uc_line, lattices]
                uc_info = [
                    len(self.final_objects), best_sg, best_uc, None,
                    uc_no_stdev, lattices
                ]
                uc_summary.append(uc_info)

            uc_table.append('\nMost common unit cell:\n')

            # select the most prevalent unit cell (most members in cluster)
            uc_freqs = [i[0] for i in uc_summary]
            uc_pick = uc_summary[np.argmax(uc_freqs)]
            uc_table.append(uc_pick[4])
            uc_table.append('\nBravais Lattices in Biggest Cluster: {}'
                            ''.format(uc_pick[5]))
            self.cons_pg = uc_pick[1]
            self.cons_uc = uc_pick[2]

            if uc_pick[3] != None:
                self.prime_data_path = uc_pick[3]

            for item in uc_table:
                misc.main_log(self.logfile, item, (not self.gui_mode))

            self.analysis_result.__setattr__('clusters', self.clusters)
            self.analysis_result.__setattr__('cons_pg', self.cons_pg)
            self.analysis_result.__setattr__('cons_uc', self.cons_uc)

            if self.gui_mode:
                return self.clusters
Ejemplo n.º 25
0
    def unit_cell_analysis(self, write_files=True):
        """ Calls unit cell analysis module, which uses hierarchical clustering
        (Zeldin, et al, Acta D, 2015) to split integration results according to
        detected morphological groupings (if any). Most useful with preliminary
        integration without target unit cell specified. """

        # Will not run clustering if only one integration result found or if turned off
        if self.final_objects is None:
            self.cons_uc = None
            self.cons_pg = None
            misc.main_log(self.logfile,
                          "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True)
            misc.main_log(self.logfile, '\n UNIT CELL CANNOT BE DETERMINED!',
                          True)

        elif len(self.final_objects) == 1:
            unit_cell = (self.final_objects[0].final['a'],
                         self.final_objects[0].final['b'],
                         self.final_objects[0].final['c'],
                         self.final_objects[0].final['alpha'],
                         self.final_objects[0].final['beta'],
                         self.final_objects[0].final['gamma'])
            point_group = self.final_objects[0].final['sg']
            misc.main_log(self.logfile,
                          "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True)
            uc_line = "{:<6} {:^4}:  {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, "\
                      "{:<6.2f}, {:<6.2f}".format('(1)', point_group,
                            unit_cell[0], unit_cell[1], unit_cell[2],
                            unit_cell[3], unit_cell[4], unit_cell[5])
            misc.main_log(self.logfile, uc_line, True)

            self.cons_pg = point_group
            self.cons_uc = unit_cell

        else:
            uc_table = []
            uc_summary = []

            if self.params.analysis.run_clustering:
                # run hierarchical clustering analysis
                from xfel.clustering.cluster import Cluster

                counter = 0
                ucs = Cluster.from_files(pickle_list=self.pickles, use_b=True)
                clusters, _ = ucs.ab_cluster(
                    self.params.analysis.cluster_threshold,
                    log=False,
                    write_file_lists=False,
                    schnell=False,
                    doplot=False)
                uc_table.append("\n\n{:-^80}\n"\
                                "".format(' UNIT CELL ANALYSIS '))

                # extract clustering info and add to summary output list
                if len(self.pickles) / 10 >= 10:
                    cluster_limit = 10
                else:
                    cluster_limit = len(self.pickles) / 10

                for cluster in clusters:
                    sorted_pg_comp = sorted(cluster.pg_composition.items(),
                                            key=lambda x: -1 * x[1])
                    pg_nums = [pg[1] for pg in sorted_pg_comp]
                    cons_pg = sorted_pg_comp[np.argmax(pg_nums)]

                    if len(cluster.members) > cluster_limit:
                        counter += 1

                        # Sort clustered images by mosaicity, lowest to highest
                        cluster_filenames = [j.path for j in cluster.members]
                        clustered_objects = [i for i in self.final_objects if \
                                             i.final['final'] in cluster_filenames]
                        sorted_cluster = sorted(clustered_objects,
                                                key=lambda i: i.final['mos'])
                        # Write to file
                        if write_files:
                            output_file = os.path.join(
                                self.output_dir,
                                "uc_cluster_{}.lst".format(counter))
                            for obj in sorted_cluster:
                                with open(output_file, 'a') as scf:
                                    scf.write('{}\n'.format(
                                        obj.final['final']))

                            mark_output = os.path.basename(output_file)
                        else:
                            mark_output = '*'
                            output_file = None

                        # Populate clustering info for GUI display
                        uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \
                                      "{:<6.2f} {:<6.2f} {:<6.2f} " \
                                      "".format(cluster.medians[0], cluster.medians[1],
                                                cluster.medians[2], cluster.medians[3],
                                                cluster.medians[4], cluster.medians[5])
                        cluster_info = {
                            'number': len(cluster.members),
                            'pg': cons_pg[0],
                            'uc': uc_no_stdev,
                            'filename': mark_output
                        }
                        self.clusters.append(cluster_info)

                    else:
                        mark_output = ''
                        output_file = None

                    # format and record output
                    uc_line = "{:<6} {:^4}:  {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                              "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                              "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f})   "\
                              "{}".format('({})'.format(len(cluster.members)), cons_pg[0],
                                                    cluster.medians[0], cluster.stdevs[0],
                                                    cluster.medians[1], cluster.stdevs[1],
                                                    cluster.medians[2], cluster.stdevs[2],
                                                    cluster.medians[3], cluster.stdevs[3],
                                                    cluster.medians[4], cluster.stdevs[4],
                                                    cluster.medians[5], cluster.stdevs[5],
                                                    mark_output)
                    uc_table.append(uc_line)
                    lattices = ', '.join(
                        ['{} ({})'.format(i[0], i[1]) for i in sorted_pg_comp])
                    uc_info = [
                        len(cluster.members), cons_pg[0], cluster.medians,
                        output_file, uc_line, lattices
                    ]
                    uc_summary.append(uc_info)

            else:

                # generate average unit cell
                uc_table.append("\n\n{:-^80}\n" \
                                "".format(' UNIT CELL AVERAGING (no clustering) '))
                uc_a = [i.final['a'] for i in self.final_objects]
                uc_b = [i.final['b'] for i in self.final_objects]
                uc_c = [i.final['c'] for i in self.final_objects]
                uc_alpha = [i.final['alpha'] for i in self.final_objects]
                uc_beta = [i.final['beta'] for i in self.final_objects]
                uc_gamma = [i.final['gamma'] for i in self.final_objects]
                uc_sg = [i.final['sg'] for i in self.final_objects]
                cons_pg = Counter(uc_sg).most_common(1)[0][0]
                all_pgs = Counter(uc_sg).most_common()
                uc_line = "{:<6} {:^4}:  {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \
                          "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \
                          "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f})   " \
                          "{}".format('({})'.format(len(self.final_objects)), cons_pg,
                                      np.median(uc_a), np.std(uc_a),
                                      np.median(uc_b), np.std(uc_b),
                                      np.median(uc_c), np.std(uc_c),
                                      np.median(uc_alpha), np.std(uc_alpha),
                                      np.median(uc_beta), np.std(uc_beta),
                                      np.median(uc_gamma), np.std(uc_gamma), '')
                unit_cell = (np.median(uc_a), np.median(uc_b), np.median(uc_c),
                             np.median(uc_alpha), np.median(uc_beta),
                             np.median(uc_gamma))
                uc_table.append(uc_line)
                lattices = ', '.join(
                    ['{} ({})'.format(i[0], i[1]) for i in all_pgs])
                uc_info = [
                    len(self.final_objects), cons_pg, unit_cell, None, uc_line,
                    lattices
                ]
                uc_summary.append(uc_info)

            uc_table.append('\nMost common unit cell:\n')

            # select the most prevalent unit cell (most members in cluster)
            uc_freqs = [i[0] for i in uc_summary]
            uc_pick = uc_summary[np.argmax(uc_freqs)]
            uc_table.append(uc_pick[4])
            uc_table.append('\nBravais Lattices in Biggest Cluster: {}'
                            ''.format(uc_pick[5]))

            self.cons_pg = uc_pick[1]
            self.cons_uc = uc_pick[2]

            if uc_pick[3] != None:
                self.prime_data_path = uc_pick[3]

            for item in uc_table:
                misc.main_log(self.logfile, item, (not self.gui_mode))

            if self.gui_mode:
                return self.cons_pg, self.cons_uc, self.clusters
Ejemplo n.º 26
0
    def run(self):
        '''Execute the script.'''

        from dials.util.options import flatten_experiments
        from libtbx.utils import Sorry

        # Parse the command line
        params, options = self.parser.parse_args(show_diff_phil=True)

        # Try to load the models and data
        if len(params.input.experiments) == 0:
            print "No Experiments found in the input"
            self.parser.print_help()
            return
        if len(params.input.reflections) == 0:
            print "No reflection data found in the input"
            self.parser.print_help()
            return
        try:
            assert len(params.input.reflections) == len(
                params.input.experiments)
        except AssertionError:
            raise Sorry(
                "The number of input reflections files does not match the "
                "number of input experiments")

        flat_exps = flatten_experiments(params.input.experiments)

        ref_beam = params.reference_from_experiment.beam
        ref_goniometer = params.reference_from_experiment.goniometer
        ref_scan = params.reference_from_experiment.scan
        ref_crystal = params.reference_from_experiment.crystal
        ref_detector = params.reference_from_experiment.detector

        if ref_beam is not None:
            try:
                ref_beam = flat_exps[ref_beam].beam
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_beam))

        if ref_goniometer is not None:
            try:
                ref_goniometer = flat_exps[ref_goniometer].goniometer
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_goniometer))

        if ref_scan is not None:
            try:
                ref_scan = flat_exps[ref_scan].scan
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_scan))

        if ref_crystal is not None:
            try:
                ref_crystal = flat_exps[ref_crystal].crystal
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_crystal))

        if ref_detector is not None:
            assert not params.reference_from_experiment.average_detector
            try:
                ref_detector = flat_exps[ref_detector].detector
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_detector))
        elif params.reference_from_experiment.average_detector:
            # Average all of the detectors together
            from scitbx.matrix import col

            def average_detectors(target, panelgroups, depth):
                # Recursive function to do the averaging

                if params.reference_from_experiment.average_hierarchy_level is None or \
                    depth == params.reference_from_experiment.average_hierarchy_level:
                    n = len(panelgroups)
                    sum_fast = col((0.0, 0.0, 0.0))
                    sum_slow = col((0.0, 0.0, 0.0))
                    sum_ori = col((0.0, 0.0, 0.0))

                    # Average the d matrix vectors
                    for pg in panelgroups:
                        sum_fast += col(pg.get_local_fast_axis())
                        sum_slow += col(pg.get_local_slow_axis())
                        sum_ori += col(pg.get_local_origin())
                    sum_fast /= n
                    sum_slow /= n
                    sum_ori /= n

                    # Re-orthagonalize the slow and the fast vectors by rotating around the cross product
                    c = sum_fast.cross(sum_slow)
                    a = sum_fast.angle(sum_slow, deg=True) / 2
                    sum_fast = sum_fast.rotate(c, a - 45, deg=True)
                    sum_slow = sum_slow.rotate(c, -(a - 45), deg=True)

                    target.set_local_frame(sum_fast, sum_slow, sum_ori)

                if target.is_group():
                    # Recurse
                    for i, target_pg in enumerate(target):
                        average_detectors(target_pg,
                                          [pg[i] for pg in panelgroups],
                                          depth + 1)

            ref_detector = flat_exps[0].detector
            average_detectors(ref_detector.hierarchy(),
                              [e.detector.hierarchy() for e in flat_exps], 0)

        combine = CombineWithReference(beam=ref_beam,
                                       goniometer=ref_goniometer,
                                       scan=ref_scan,
                                       crystal=ref_crystal,
                                       detector=ref_detector,
                                       params=params)

        # set up global experiments and reflections lists
        from dials.array_family import flex
        reflections = flex.reflection_table()
        global_id = 0
        from dxtbx.model.experiment_list import ExperimentList
        experiments = ExperimentList()

        # loop through the input, building up the global lists
        nrefs_per_exp = []
        for ref_wrapper, exp_wrapper in zip(params.input.reflections,
                                            params.input.experiments):
            refs = ref_wrapper.data
            exps = exp_wrapper.data
            for i, exp in enumerate(exps):
                sel = refs['id'] == i
                sub_ref = refs.select(sel)
                nrefs_per_exp.append(len(sub_ref))
                sub_ref['id'] = flex.int(len(sub_ref), global_id)
                if params.output.delete_shoeboxes and 'shoebox' in sub_ref:
                    del sub_ref['shoebox']
                reflections.extend(sub_ref)
                experiments.append(combine(exp))
                global_id += 1

        # print number of reflections per experiment
        from libtbx.table_utils import simple_table
        header = ["Experiment", "Nref"]
        rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)]
        st = simple_table(rows, header)
        print st.format()

        # save a random subset if requested
        if params.output.n_subset is not None and len(
                experiments) > params.output.n_subset:
            import random
            subset_exp = ExperimentList()
            subset_refls = flex.reflection_table()
            n_picked = 0
            indices = range(len(experiments))
            while n_picked < params.output.n_subset:
                idx = indices.pop(random.randint(0, len(indices) - 1))
                subset_exp.append(experiments[idx])
                refls = reflections.select(reflections['id'] == idx)
                refls['id'] = flex.int(len(refls), n_picked)
                subset_refls.extend(refls)
                n_picked += 1
            experiments = subset_exp
            reflections = subset_refls

        def save_output(experiments, reflections, exp_name, refl_name):
            # save output
            from dxtbx.model.experiment_list import ExperimentListDumper
            print 'Saving combined experiments to {0}'.format(exp_name)
            dump = ExperimentListDumper(experiments)
            dump.as_json(exp_name)
            print 'Saving combined reflections to {0}'.format(refl_name)
            reflections.as_pickle(refl_name)

        def save_in_batches(experiments,
                            reflections,
                            exp_name,
                            refl_name,
                            batch_size=1000):
            from dxtbx.command_line.image_average import splitit
            import os
            result = []
            for i, indices in enumerate(
                    splitit(range(len(experiments)),
                            (len(experiments) // batch_size) + 1)):
                batch_expts = ExperimentList()
                batch_refls = flex.reflection_table()
                for sub_id, sub_idx in enumerate(indices):
                    batch_expts.append(experiments[sub_idx])
                    sub_refls = reflections.select(
                        reflections['id'] == sub_idx)
                    sub_refls['id'] = flex.int(len(sub_refls), sub_id)
                    batch_refls.extend(sub_refls)
                exp_filename = os.path.splitext(exp_name)[0] + "_%03d.json" % i
                ref_filename = os.path.splitext(
                    refl_name)[0] + "_%03d.pickle" % i
                save_output(batch_expts, batch_refls, exp_filename,
                            ref_filename)

        def combine_in_clusters(experiments_l, reflections_l, exp_name,
                                refl_name, end_count):
            import os
            result = []
            for cluster in xrange(len(experiments_l)):
                cluster_expts = ExperimentList()
                cluster_refls = flex.reflection_table()
                for i in xrange(len(experiments_l[cluster])):
                    refls = reflections_l[cluster][i]
                    expts = experiments_l[cluster][i]
                    refls['id'] = flex.int(len(refls), i)
                    cluster_expts.append(expts)
                    cluster_refls.extend(refls)
                exp_filename = os.path.splitext(exp_name)[0] + (
                    "_cluster%d.json" % (end_count - cluster))
                ref_filename = os.path.splitext(refl_name)[0] + (
                    "_cluster%d.pickle" % (end_count - cluster))
                result.append(
                    (cluster_expts, cluster_refls, exp_filename, ref_filename))
            return result

        # cluster the resulting experiments if requested
        if params.clustering.use:
            clustered = Cluster(experiments,
                                reflections,
                                dendrogram=params.clustering.dendrogram,
                                threshold=params.clustering.threshold,
                                n_max=params.clustering.max_crystals)
            n_clusters = len(clustered.clustered_frames)
            if params.clustering.max_clusters is not None:
                not_too_many = lambda keeps: len(
                    keeps) < params.clustering.max_clusters
            else:
                not_too_many = lambda keeps: True
            keep_frames = []
            sorted_keys = sorted(clustered.clustered_frames.keys())
            while len(clustered.clustered_frames) > 0 and not_too_many(
                    keep_frames):
                keep_frames.append(
                    clustered.clustered_frames.pop(sorted_keys.pop(-1)))
            if params.clustering.exclude_single_crystal_clusters:
                keep_frames = [k for k in keep_frames if len(k) > 1]
            clustered_experiments = [[f.experiment for f in frame_cluster]
                                     for frame_cluster in keep_frames]
            clustered_reflections = [[f.reflections for f in frame_cluster]
                                     for frame_cluster in keep_frames]
            list_of_combined = combine_in_clusters(
                clustered_experiments, clustered_reflections,
                params.output.experiments_filename,
                params.output.reflections_filename, n_clusters)
            for i in xrange(len(list_of_combined)):
                savable_tuple = list_of_combined[i]
                if params.output.max_batch_size is None:
                    save_output(*savable_tuple)
                else:
                    save_in_batches(*savable_tuple,
                                    batch_size=params.output.max_batch_size)
        else:
            if params.output.max_batch_size is None:
                save_output(experiments, reflections,
                            params.output.experiments_filename,
                            params.output.reflections_filename)
            else:
                save_in_batches(experiments,
                                reflections,
                                params.output.experiments_filename,
                                params.output.reflections_filename,
                                batch_size=params.output.max_batch_size)
        return
Ejemplo n.º 27
0
def run(args: List[str] = None, phil: phil.scope = phil_scope) -> None:
    parser = OptionParser(
        usage="",
        read_experiments=True,
        read_reflections=True,
        phil=phil_scope,
        check_format=False,
        epilog=__doc__,
    )
    params, _ = parser.parse_args(args=args, show_diff_phil=False)

    if not params.input.experiments or not params.input.reflections:
        parser.print_help()
        sys.exit()

    reflections, experiments = reflections_and_experiments_from_files(
        params.input.reflections, params.input.experiments)
    log.config(verbosity=1, logfile=params.output.log)
    logger.info(dials_version())

    diff_phil = parser.diff_phil.as_str()
    if diff_phil:
        logger.info("The following parameters have been modified:\n%s",
                    diff_phil)

    st = time.time()
    indexed_experiments, indexed_reflections, summary_data = index(
        experiments, reflections[0], params)

    # print some clustering information
    ucs = Cluster.from_crystal_symmetries([
        crystal.symmetry(
            unit_cell=expt.crystal.get_unit_cell(),
            space_group=expt.crystal.get_space_group(),
        ) for expt in indexed_experiments
    ])
    clusters, _ = ucs.ab_cluster(5000,
                                 log=None,
                                 write_file_lists=False,
                                 doplot=False)
    large_clusters = []
    cluster_plots = {}
    threshold = math.floor(0.05 * len(indexed_experiments))
    for cluster in clusters:
        if len(cluster.members) > threshold:
            large_clusters.append(cluster)
    large_clusters.sort(key=lambda x: len(x.members), reverse=True)

    if large_clusters:
        logger.info(f"""
Unit cell clustering analysis, clusters with >5% of the number of crystals indexed
""" + unit_cell_info(large_clusters))
        if params.output.html or params.output.json:
            cluster_plots = make_cluster_plots(large_clusters)
    else:
        logger.info(f"No clusters found with >5% of the number of crystals.")

    logger.info(f"Saving indexed experiments to {params.output.experiments}")
    indexed_experiments.as_file(params.output.experiments)
    logger.info(f"Saving indexed reflections to {params.output.reflections}")
    indexed_reflections.as_file(params.output.reflections)

    if params.output.html or params.output.json:
        summary_plots = generate_plots(summary_data)
        if cluster_plots:
            summary_plots.update(cluster_plots)
        if params.output.html:
            generate_html_report(summary_plots, params.output.html)
        if params.output.json:
            with open(params.output.json, "w") as outfile:
                json.dump(summary_plots, outfile)

    logger.info(f"Total time: {time.time() - st:.2f}s")
Ejemplo n.º 28
0
    def run_with_preparsed(self, params, options):
        """Run combine_experiments, but allow passing in of parameters"""
        from dials.util.options import flatten_experiments

        # Try to load the models and data
        if len(params.input.experiments) == 0:
            print("No Experiments found in the input")
            self.parser.print_help()
            return
        if len(params.input.reflections) == 0:
            print("No reflection data found in the input")
            self.parser.print_help()
            return
        try:
            assert len(params.input.reflections) == len(
                params.input.experiments)
        except AssertionError:
            raise Sorry(
                "The number of input reflections files does not match the "
                "number of input experiments")

        flat_exps = flatten_experiments(params.input.experiments)

        ref_beam = params.reference_from_experiment.beam
        ref_goniometer = params.reference_from_experiment.goniometer
        ref_scan = params.reference_from_experiment.scan
        ref_crystal = params.reference_from_experiment.crystal
        ref_detector = params.reference_from_experiment.detector

        if ref_beam is not None:
            try:
                ref_beam = flat_exps[ref_beam].beam
            except IndexError:
                raise Sorry("{} is not a valid experiment ID".format(ref_beam))

        if ref_goniometer is not None:
            try:
                ref_goniometer = flat_exps[ref_goniometer].goniometer
            except IndexError:
                raise Sorry(
                    "{} is not a valid experiment ID".format(ref_goniometer))

        if ref_scan is not None:
            try:
                ref_scan = flat_exps[ref_scan].scan
            except IndexError:
                raise Sorry("{} is not a valid experiment ID".format(ref_scan))

        if ref_crystal is not None:
            try:
                ref_crystal = flat_exps[ref_crystal].crystal
            except IndexError:
                raise Sorry(
                    "{} is not a valid experiment ID".format(ref_crystal))

        if ref_detector is not None:
            assert not params.reference_from_experiment.average_detector
            try:
                ref_detector = flat_exps[ref_detector].detector
            except IndexError:
                raise Sorry(
                    "{} is not a valid experiment ID".format(ref_detector))
        elif params.reference_from_experiment.average_detector:
            # Average all of the detectors together
            from scitbx.matrix import col

            def average_detectors(target, panelgroups, depth):
                # Recursive function to do the averaging

                if (params.reference_from_experiment.average_hierarchy_level is
                        None or depth == params.reference_from_experiment.
                        average_hierarchy_level):
                    n = len(panelgroups)
                    sum_fast = col((0.0, 0.0, 0.0))
                    sum_slow = col((0.0, 0.0, 0.0))
                    sum_ori = col((0.0, 0.0, 0.0))

                    # Average the d matrix vectors
                    for pg in panelgroups:
                        sum_fast += col(pg.get_local_fast_axis())
                        sum_slow += col(pg.get_local_slow_axis())
                        sum_ori += col(pg.get_local_origin())
                    sum_fast /= n
                    sum_slow /= n
                    sum_ori /= n

                    # Re-orthagonalize the slow and the fast vectors by rotating around the cross product
                    c = sum_fast.cross(sum_slow)
                    a = sum_fast.angle(sum_slow, deg=True) / 2
                    sum_fast = sum_fast.rotate(c, a - 45, deg=True)
                    sum_slow = sum_slow.rotate(c, -(a - 45), deg=True)

                    target.set_local_frame(sum_fast, sum_slow, sum_ori)

                if target.is_group():
                    # Recurse
                    for i, target_pg in enumerate(target):
                        average_detectors(target_pg,
                                          [pg[i] for pg in panelgroups],
                                          depth + 1)

            ref_detector = flat_exps[0].detector
            average_detectors(ref_detector.hierarchy(),
                              [e.detector.hierarchy() for e in flat_exps], 0)

        combine = CombineWithReference(
            beam=ref_beam,
            goniometer=ref_goniometer,
            scan=ref_scan,
            crystal=ref_crystal,
            detector=ref_detector,
            params=params,
        )

        # set up global experiments and reflections lists
        from dials.array_family import flex

        reflections = flex.reflection_table()
        global_id = 0
        skipped_expts = 0
        from dxtbx.model.experiment_list import ExperimentList

        experiments = ExperimentList()

        # loop through the input, building up the global lists
        nrefs_per_exp = []
        for ref_wrapper, exp_wrapper in zip(params.input.reflections,
                                            params.input.experiments):
            refs = ref_wrapper.data
            exps = exp_wrapper.data
            for i, exp in enumerate(exps):
                sel = refs["id"] == i
                sub_ref = refs.select(sel)
                n_sub_ref = len(sub_ref)
                if (params.output.min_reflections_per_experiment is not None
                        and n_sub_ref <
                        params.output.min_reflections_per_experiment):
                    skipped_expts += 1
                    continue

                nrefs_per_exp.append(n_sub_ref)
                sub_ref["id"] = flex.int(len(sub_ref), global_id)
                if params.output.delete_shoeboxes and "shoebox" in sub_ref:
                    del sub_ref["shoebox"]
                reflections.extend(sub_ref)
                try:
                    experiments.append(combine(exp))
                except ComparisonError as e:
                    # When we failed tolerance checks, give a useful error message
                    (path,
                     index) = find_experiment_in(exp, params.input.experiments)
                    raise Sorry(
                        "Model didn't match reference within required tolerance for experiment {} in {}:"
                        "\n{}\nAdjust tolerances or set compare_models=False to ignore differences."
                        .format(index, path, str(e)))

                global_id += 1

        if (params.output.min_reflections_per_experiment is not None
                and skipped_expts > 0):
            print("Removed {0} experiments with fewer than {1} reflections".
                  format(skipped_expts,
                         params.output.min_reflections_per_experiment))

        # print number of reflections per experiment
        from libtbx.table_utils import simple_table

        header = ["Experiment", "Number of reflections"]
        rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)]
        st = simple_table(rows, header)
        print(st.format())

        # save a random subset if requested
        if (params.output.n_subset is not None
                and len(experiments) > params.output.n_subset):
            subset_exp = ExperimentList()
            subset_refls = flex.reflection_table()
            if params.output.n_subset_method == "random":
                n_picked = 0
                indices = list(range(len(experiments)))
                while n_picked < params.output.n_subset:
                    idx = indices.pop(random.randint(0, len(indices) - 1))
                    subset_exp.append(experiments[idx])
                    refls = reflections.select(reflections["id"] == idx)
                    refls["id"] = flex.int(len(refls), n_picked)
                    subset_refls.extend(refls)
                    n_picked += 1
                print(
                    "Selecting a random subset of {0} experiments out of {1} total."
                    .format(params.output.n_subset, len(experiments)))
            elif params.output.n_subset_method == "n_refl":
                if params.output.n_refl_panel_list is None:
                    refls_subset = reflections
                else:
                    sel = flex.bool(len(reflections), False)
                    for p in params.output.n_refl_panel_list:
                        sel |= reflections["panel"] == p
                    refls_subset = reflections.select(sel)
                refl_counts = flex.int()
                for expt_id in range(len(experiments)):
                    refl_counts.append(
                        len(refls_subset.select(
                            refls_subset["id"] == expt_id)))
                sort_order = flex.sort_permutation(refl_counts, reverse=True)
                for expt_id, idx in enumerate(
                        sort_order[:params.output.n_subset]):
                    subset_exp.append(experiments[idx])
                    refls = reflections.select(reflections["id"] == idx)
                    refls["id"] = flex.int(len(refls), expt_id)
                    subset_refls.extend(refls)
                print(
                    "Selecting a subset of {0} experiments with highest number of reflections out of {1} total."
                    .format(params.output.n_subset, len(experiments)))

            elif params.output.n_subset_method == "significance_filter":
                from dials.algorithms.integration.stills_significance_filter import (
                    SignificanceFilter, )

                params.output.significance_filter.enable = True
                sig_filter = SignificanceFilter(params.output)
                refls_subset = sig_filter(experiments, reflections)
                refl_counts = flex.int()
                for expt_id in range(len(experiments)):
                    refl_counts.append(
                        len(refls_subset.select(
                            refls_subset["id"] == expt_id)))
                sort_order = flex.sort_permutation(refl_counts, reverse=True)
                for expt_id, idx in enumerate(
                        sort_order[:params.output.n_subset]):
                    subset_exp.append(experiments[idx])
                    refls = reflections.select(reflections["id"] == idx)
                    refls["id"] = flex.int(len(refls), expt_id)
                    subset_refls.extend(refls)

            experiments = subset_exp
            reflections = subset_refls

        def save_in_batches(experiments,
                            reflections,
                            exp_name,
                            refl_name,
                            batch_size=1000):
            from dxtbx.command_line.image_average import splitit

            for i, indices in enumerate(
                    splitit(list(range(len(experiments))),
                            (len(experiments) // batch_size) + 1)):
                batch_expts = ExperimentList()
                batch_refls = flex.reflection_table()
                for sub_id, sub_idx in enumerate(indices):
                    batch_expts.append(experiments[sub_idx])
                    sub_refls = reflections.select(
                        reflections["id"] == sub_idx)
                    sub_refls["id"] = flex.int(len(sub_refls), sub_id)
                    batch_refls.extend(sub_refls)
                exp_filename = os.path.splitext(exp_name)[0] + "_%03d.expt" % i
                ref_filename = os.path.splitext(
                    refl_name)[0] + "_%03d.refl" % i
                self._save_output(batch_expts, batch_refls, exp_filename,
                                  ref_filename)

        def combine_in_clusters(experiments_l, reflections_l, exp_name,
                                refl_name, end_count):
            result = []
            for cluster, experiment in enumerate(experiments_l):
                cluster_expts = ExperimentList()
                cluster_refls = flex.reflection_table()
                for i, expts in enumerate(experiment):
                    refls = reflections_l[cluster][i]
                    refls["id"] = flex.int(len(refls), i)
                    cluster_expts.append(expts)
                    cluster_refls.extend(refls)
                exp_filename = os.path.splitext(exp_name)[0] + (
                    "_cluster%d.expt" % (end_count - cluster))
                ref_filename = os.path.splitext(refl_name)[0] + (
                    "_cluster%d.refl" % (end_count - cluster))
                result.append(
                    (cluster_expts, cluster_refls, exp_filename, ref_filename))
            return result

        # cluster the resulting experiments if requested
        if params.clustering.use:
            clustered = Cluster(
                experiments,
                reflections,
                dendrogram=params.clustering.dendrogram,
                threshold=params.clustering.threshold,
                n_max=params.clustering.max_crystals,
            )
            n_clusters = len(clustered.clustered_frames)

            def not_too_many(keeps):
                if params.clustering.max_clusters is not None:
                    return len(keeps) < params.clustering.max_clusters
                return True

            keep_frames = []
            sorted_keys = sorted(clustered.clustered_frames.keys())
            while len(clustered.clustered_frames) > 0 and not_too_many(
                    keep_frames):
                keep_frames.append(
                    clustered.clustered_frames.pop(sorted_keys.pop(-1)))
            if params.clustering.exclude_single_crystal_clusters:
                keep_frames = [k for k in keep_frames if len(k) > 1]
            clustered_experiments = [[f.experiment for f in frame_cluster]
                                     for frame_cluster in keep_frames]
            clustered_reflections = [[f.reflections for f in frame_cluster]
                                     for frame_cluster in keep_frames]
            list_of_combined = combine_in_clusters(
                clustered_experiments,
                clustered_reflections,
                params.output.experiments_filename,
                params.output.reflections_filename,
                n_clusters,
            )
            for saveable_tuple in list_of_combined:
                if params.output.max_batch_size is None:
                    self._save_output(*saveable_tuple)
                else:
                    save_in_batches(*saveable_tuple,
                                    batch_size=params.output.max_batch_size)
        else:
            if params.output.max_batch_size is None:
                self._save_output(
                    experiments,
                    reflections,
                    params.output.experiments_filename,
                    params.output.reflections_filename,
                )
            else:
                save_in_batches(
                    experiments,
                    reflections,
                    params.output.experiments_filename,
                    params.output.reflections_filename,
                    batch_size=params.output.max_batch_size,
                )
        return
Ejemplo n.º 29
0
  def unit_cell_analysis(self,
                         cluster_threshold,
                         output_dir,
                         write_files=True):
    """ Calls unit cell analysis module, which uses hierarchical clustering
        (Zeldin, et al, Acta D, 2015) to split integration results according to
        detected morphological groupings (if any). Most useful with preliminary
        integration without target unit cell specified. """

    # Will not run clustering if only one integration result found
    if len(self.final_objects) == 1:
      unit_cell = (self.final_objects[0].final['a'],
                   self.final_objects[0].final['b'],
                   self.final_objects[0].final['c'],
                   self.final_objects[0].final['alpha'],
                   self.final_objects[0].final['beta'],
                   self.final_objects[0].final['gamma'])
      point_group = self.final_objects[0].final['sg']
      misc.main_log(self.logfile,
                    "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True)
      uc_line = "{:<6} {:^4}:  {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, "\
                "{:<6.2f}, {:<6.2f}".format('(1)', point_group,
                      unit_cell[0], unit_cell[1], unit_cell[2],
                      unit_cell[3], unit_cell[4], unit_cell[5])
      misc.main_log(self.logfile, uc_line, True)

      self.cons_pg = point_group
      self.cons_uc = unit_cell

    else:
      uc_table = []
      uc_summary = []
      counter = 1

      # run hierarchical clustering analysis
      ucs = Cluster.from_files(self.pickles, use_b=True)
      clusters, _ = ucs.ab_cluster(cluster_threshold, log=False,
                                   write_file_lists=False, schnell=False,
                                   doplot=False)
      uc_table.append("\n\n{:-^80}\n"\
                      "".format(' UNIT CELL ANALYSIS '))

      # extract clustering info and add to summary output list
      for cluster in clusters:
        sorted_pg_comp = sorted(cluster.pg_composition.items(),
                                  key=lambda x: -1 * x[1])
        pg_nums = [pg[1] for pg in sorted_pg_comp]
        cons_pg = sorted_pg_comp[np.argmax(pg_nums)]

        output_file = os.path.join(output_dir, "uc_cluster_{}.lst".format(counter))

        # write out lists of output pickles that comprise clusters with > 1 members
        if len(cluster.members) > 1:
          counter += 1

          # Sort clustered images by mosaicity, lowest to highest
          cluster_filenames = [j.path for j in cluster.members]
          clustered_objects = [i for i in self.final_objects if \
                               i.final['final'] in cluster_filenames]
          sorted_cluster = sorted(clustered_objects,
                                  key=lambda i: i.final['mos'])
          # Write to file
          if write_files:
            for obj in sorted_cluster:
              with open(output_file, 'a') as scf:
                scf.write('{}\n'.format(obj.final['final']))

            mark_output = os.path.basename(output_file)
          else:
            mark_output = '*'
            output_file = None
        else:
          mark_output = ''
          output_file = None

        # format and record output
        uc_line = "{:<6} {:^4}:  {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                  "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                  "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f})   "\
                  "{}".format('({})'.format(len(cluster.members)), cons_pg[0],
                                        cluster.medians[0], cluster.stdevs[0],
                                        cluster.medians[1], cluster.stdevs[1],
                                        cluster.medians[2], cluster.stdevs[2],
                                        cluster.medians[3], cluster.stdevs[3],
                                        cluster.medians[4], cluster.stdevs[4],
                                        cluster.medians[5], cluster.stdevs[5],
                                        mark_output)
        uc_table.append(uc_line)
        uc_info = [len(cluster.members), cons_pg[0], cluster.medians,
                   output_file, uc_line]
        uc_summary.append(uc_info)

      uc_table.append('\nMost common unit cell:\n')

      # select the most prevalent unit cell (most members in cluster)
      uc_freqs = [i[0] for i in uc_summary]
      uc_pick = uc_summary[np.argmax(uc_freqs)]
      uc_table.append(uc_pick[4])

      self.cons_pg = uc_pick[1]
      self.cons_uc = uc_pick[2]

      if uc_pick[3] != None:
        self.prime_data_path = uc_pick[3]

      for item in uc_table:
          misc.main_log(self.logfile, item, True)
Ejemplo n.º 30
0
    def unit_cell_analysis(self):
        """ Calls unit cell analysis module, which uses hierarchical clustering
        (Zeldin, et al, Acta D, 2015) to split integration results according to
        detected morphological groupings (if any). Most useful with preliminary
        integration without target unit cell specified. """

        # Will not run clustering if only one integration result found or if turned off
        if not self.info.categories['integrated']:
            util.main_log(self.info.logfile,
                          "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True)
            util.main_log(self.info.logfile,
                          '\n UNIT CELL CANNOT BE DETERMINED!', True)

        elif len(self.info.categories['integrated']) == 1:
            unit_cell = (self.info.cluster_iterable[0][:5])
            point_group = self.info.cluster_iterable[0][6]
            util.main_log(self.info.logfile,
                          "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True)
            uc_line = "{:<6} {:^4}:  {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, " \
                      "{:<6.2f}, {:<6.2f}".format('(1)', point_group,
                                                  unit_cell[0], unit_cell[1],
                                                  unit_cell[2],
                                                  unit_cell[3], unit_cell[4],
                                                  unit_cell[5])
            util.main_log(self.info.logfile, uc_line, True)

            self.info.best_pg = str(point_group)
            self.info.best_uc = unit_cell

        else:
            uc_table = []
            uc_summary = []

            if self.params.analysis.clustering.flag_on:
                # run hierarchical clustering analysis
                from xfel.clustering.cluster import Cluster

                counter = 0
                self.info.clusters = []

                threshold = self.params.analysis.clustering.threshold
                cluster_limit = self.params.analysis.clustering.limit
                final_pickles = self.info.categories['integrated'][0]

                pickles = []
                if self.params.analysis.clustering.n_images > 0:
                    import random

                    for i in range(
                            len(self.params.analysis.clustering.n_images)):
                        random_number = random.randrange(0, len(final_pickles))
                        if final_pickles[random_number] in pickles:
                            while final_pickles[random_number] in pickles:
                                random_number = random.randrange(
                                    0, len(final_pickles))
                            pickles.append(final_pickles[random_number])
                else:
                    pickles = final_pickles

                # Cluster from files (slow, but will keep for now)
                ucs = Cluster.from_files(pickle_list=pickles)

                # Do clustering
                clusters, _ = ucs.ab_cluster(threshold=threshold,
                                             log=False,
                                             write_file_lists=False,
                                             schnell=False,
                                             doplot=False)
                uc_table.append("\n\n{:-^80}\n" \
                                "".format(' UNIT CELL ANALYSIS '))

                # extract clustering info and add to summary output list
                if cluster_limit is None:
                    if len(pickles) / 10 >= 10:
                        cluster_limit = 10
                    else:
                        cluster_limit = len(pickles) / 10

                for cluster in clusters:
                    sorted_pg_comp = sorted(cluster.pg_composition.items(),
                                            key=lambda x: -1 * x[1])
                    pg_nums = [pg[1] for pg in sorted_pg_comp]
                    cons_pg = sorted_pg_comp[np.argmax(pg_nums)]

                    if len(cluster.members) > cluster_limit:
                        counter += 1

                        # Write to file
                        cluster_filenames = [j.path for j in cluster.members]
                        if self.params.analysis.clustering.write_files:
                            output_file = os.path.join(
                                self.info.int_base,
                                "uc_cluster_{}.lst".format(counter))
                            for fn in cluster_filenames:
                                with open(output_file, 'a') as scf:
                                    scf.write('{}\n'.format(fn))

                            mark_output = os.path.basename(output_file)
                        else:
                            mark_output = '*'
                            output_file = None

                    else:
                        mark_output = ''
                        output_file = None

                    # Populate clustering info for GUI display
                    uc_init = uctbx.unit_cell(cluster.medians)
                    symmetry = crystal.symmetry(unit_cell=uc_init,
                                                space_group_symbol='P1')
                    groups = metric_subgroups(input_symmetry=symmetry,
                                              max_delta=3)
                    top_group = groups.result_groups[0]
                    best_sg = str(groups.lattice_group_info()).split('(')[0]
                    best_uc = top_group['best_subsym'].unit_cell().parameters()
                    # best_sg = str(top_group['best_subsym'].space_group_info())

                    uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \
                                  "{:<6.2f} {:<6.2f} {:<6.2f} " \
                                  "".format(best_uc[0], best_uc[1], best_uc[2],
                                            best_uc[3], best_uc[4], best_uc[5])
                    cluster_info = {
                        'number': len(cluster.members),
                        'pg': best_sg,
                        'uc': uc_no_stdev,
                        'filename': mark_output
                    }
                    self.info.clusters.append(cluster_info)

                    # format and record output
                    # TODO: How to propagate stdevs after conversion from Niggli?
                    # uc_line = "{:<6} {:^4}:  {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                    #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                    #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f})   "\
                    #           "{}".format('({})'.format(len(cluster.members)), cons_pg[0],
                    #                                 cluster.medians[0], cluster.stdevs[0],
                    #                                 cluster.medians[1], cluster.stdevs[1],
                    #                                 cluster.medians[2], cluster.stdevs[2],
                    #                                 cluster.medians[3], cluster.stdevs[3],
                    #                                 cluster.medians[4], cluster.stdevs[4],
                    #                                 cluster.medians[5], cluster.stdevs[5],
                    #                                 mark_output)
                    # uc_table.append(uc_line)
                    uc_table.append("{:<6}:  {} {}".format(
                        len(cluster.members), uc_no_stdev, mark_output))
                    lattices = ', '.join(
                        ['{} ({})'.format(i[0], i[1]) for i in sorted_pg_comp])
                    # uc_info = [len(cluster.members), cons_pg[0], cluster.medians,
                    #            output_file, uc_line, lattices]
                    uc_info = [
                        len(cluster.members), best_sg, best_uc, output_file,
                        uc_no_stdev, lattices
                    ]
                    uc_summary.append(uc_info)

            else:
                # generate average unit cell
                uc_table.append("\n\n{:-^80}\n" \
                                "".format(' UNIT CELL AVERAGING (no clustering) '))
                uc_a, uc_b, uc_c, uc_alpha, \
                uc_beta, uc_gamma, uc_sg = list(zip(*self.info.cluster_iterable))
                cons_pg = Counter(uc_sg).most_common(1)[0][0]
                all_pgs = Counter(uc_sg).most_common()
                unit_cell = (np.median(uc_a), np.median(uc_b), np.median(uc_c),
                             np.median(uc_alpha), np.median(uc_beta),
                             np.median(uc_gamma))

                # Populate clustering info for GUI display
                uc_init = uctbx.unit_cell(unit_cell)
                symmetry = crystal.symmetry(unit_cell=uc_init,
                                            space_group_symbol='P1')
                groups = metric_subgroups(input_symmetry=symmetry, max_delta=3)
                top_group = groups.result_groups[0]
                best_sg = str(groups.lattice_group_info()).split('(')[0]
                best_uc = top_group['best_subsym'].unit_cell().parameters()
                # best_sg = str(top_group['best_subsym'].space_group_info())

                uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \
                              "{:<6.2f} {:<6.2f} {:<6.2f} " \
                              "".format(best_uc[0], best_uc[1], best_uc[2],
                                        best_uc[3], best_uc[4], best_uc[5])
                cluster_info = {
                    'number': len(self.info.cluster_iterable),
                    'pg': best_sg,
                    'uc': uc_no_stdev,
                    'filename': None
                }
                self.info.clusters.append(cluster_info)

                # uc_line = "{:<6} {:^4}:  {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \
                #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \
                #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f})   " \
                #           "{}".format('({})'.format(len(self.final_objects)), cons_pg,
                #                       np.median(uc_a), np.std(uc_a),
                #                       np.median(uc_b), np.std(uc_b),
                #                       np.median(uc_c), np.std(uc_c),
                #                       np.median(uc_alpha), np.std(uc_alpha),
                #                       np.median(uc_beta), np.std(uc_beta),
                #                       np.median(uc_gamma), np.std(uc_gamma), '')
                #
                # uc_table.append(uc_line)
                uc_table.append(uc_no_stdev)
                lattices = ', '.join(
                    ['{} ({})'.format(i[0], i[1]) for i in all_pgs])
                # uc_info = [len(self.final_objects), cons_pg, unit_cell, None,
                #            uc_line, lattices]
                uc_info = [
                    len(self.info.cluster_iterable), best_sg, best_uc, None,
                    uc_no_stdev, lattices
                ]
                uc_summary.append(uc_info)

            uc_table.append('\nMost common unit cell:\n')

            # select the most prevalent unit cell (most members in cluster)
            uc_freqs = [i[0] for i in uc_summary]
            uc_pick = uc_summary[np.argmax(uc_freqs)]
            uc_table.append(uc_pick[4])
            uc_table.append('\nBravais Lattices in Biggest Cluster: {}'
                            ''.format(uc_pick[5]))
            self.info.best_pg = str(uc_pick[1])
            self.info.best_uc = uc_pick[2]

            if uc_pick[3] is not None:
                self.prime_data_path = uc_pick[3]

            for item in uc_table:
                util.main_log(self.info.logfile, item, False)
            self.info.update(uc_table=uc_table)

            if self.gui_mode:
                return self.info.clusters
Ejemplo n.º 31
0
def run(args):
    import libtbx
    from libtbx import easy_pickle
    from dials.util import log
    from dials.util.options import OptionParser

    parser = OptionParser(
        #usage=usage,
        phil=phil_scope,
        read_reflections=True,
        read_datablocks=False,
        read_experiments=True,
        check_format=False,
        #epilog=help_message
    )

    params, options, args = parser.parse_args(show_diff_phil=False,
                                              return_unhandled=True)

    # Configure the logging
    log.config(params.verbosity,
               info=params.output.log,
               debug=params.output.debug_log)

    from dials.util.version import dials_version
    logger.info(dials_version())

    # Log the diff phil
    diff_phil = parser.diff_phil.as_str()
    if diff_phil is not '':
        logger.info('The following parameters have been modified:\n')
        logger.info(diff_phil)

    if params.seed is not None:
        import random
        flex.set_random_seed(params.seed)
        random.seed(params.seed)

    if params.save_plot and not params.animate:
        import matplotlib
        # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear
        matplotlib.use('Agg')  # use a non-interactive backend

    datasets_input = []

    experiments = flatten_experiments(params.input.experiments)
    reflections = flatten_reflections(params.input.reflections)

    if len(experiments) or len(reflections):
        if len(reflections) == 1:
            reflections_input = reflections[0]
            reflections = []
            for i in range(len(experiments)):
                reflections.append(
                    reflections_input.select(reflections_input['id'] == i))

        if len(experiments) > len(reflections):
            flattened_reflections = []
            for refl in reflections:
                for i in range(0, flex.max(refl['id']) + 1):
                    sel = refl['id'] == i
                    flattened_reflections.append(refl.select(sel))
            reflections = flattened_reflections

        assert len(experiments) == len(reflections)

        i_refl = 0
        for i_expt in enumerate(experiments):
            refl = reflections[i_refl]

        for expt, refl in zip(experiments, reflections):
            crystal_symmetry = crystal.symmetry(
                unit_cell=expt.crystal.get_unit_cell(),
                space_group=expt.crystal.get_space_group())
            if 0 and 'intensity.prf.value' in refl:
                sel = refl.get_flags(refl.flags.integrated_prf)
                assert sel.count(True) > 0
                refl = refl.select(sel)
                data = refl['intensity.prf.value']
                variances = refl['intensity.prf.variance']
            else:
                assert 'intensity.sum.value' in refl
                sel = refl.get_flags(refl.flags.integrated_sum)
                assert sel.count(True) > 0
                refl = refl.select(sel)
                data = refl['intensity.sum.value']
                variances = refl['intensity.sum.variance']
            # FIXME probably need to do some filtering of intensities similar to that
            # done in export_mtz
            miller_indices = refl['miller_index']
            assert variances.all_gt(0)
            sigmas = flex.sqrt(variances)

            miller_set = miller.set(crystal_symmetry,
                                    miller_indices,
                                    anomalous_flag=False)
            intensities = miller.array(miller_set, data=data, sigmas=sigmas)
            intensities.set_observation_type_xray_intensity()
            intensities.set_info(
                miller.array_info(source='DIALS', source_type='pickle'))
            datasets_input.append(intensities)

    files = args

    for file_name in files:

        try:
            data = easy_pickle.load(file_name)
            intensities = data['observations'][0]
            intensities.set_info(
                miller.array_info(source=file_name, source_type='pickle'))
            intensities = intensities.customized_copy(
                anomalous_flag=False).set_info(intensities.info())
            batches = None
        except Exception:
            reader = any_reflection_file(file_name)
            assert reader.file_type() == 'ccp4_mtz'

            as_miller_arrays = reader.as_miller_arrays(merge_equivalents=False)
            intensities = [
                ma for ma in as_miller_arrays
                if ma.info().labels == ['I', 'SIGI']
            ][0]
            batches = [
                ma for ma in as_miller_arrays if ma.info().labels == ['BATCH']
            ]
            if len(batches):
                batches = batches[0]
            else:
                batches = None
            mtz_object = reader.file_content()
            intensities = intensities.customized_copy(
                anomalous_flag=False,
                indices=mtz_object.extract_original_index_miller_indices(
                )).set_info(intensities.info())

        intensities.set_observation_type_xray_intensity()
        datasets_input.append(intensities)

    if len(datasets_input) == 0:
        raise Sorry('No valid reflection files provided on command line')

    datasets = []
    for intensities in datasets_input:

        if params.batch is not None:
            assert batches is not None
            bmin, bmax = params.batch
            assert bmax >= bmin
            sel = (batches.data() >= bmin) & (batches.data() <= bmax)
            assert sel.count(True) > 0
            intensities = intensities.select(sel)

        if params.min_i_mean_over_sigma_mean is not None and (
                params.d_min is libtbx.Auto or params.d_min is not None):
            from xia2.Modules import Resolutionizer
            rparams = Resolutionizer.phil_defaults.extract().resolutionizer
            rparams.nbins = 20
            resolutionizer = Resolutionizer.resolutionizer(
                intensities, None, rparams)
            i_mean_over_sigma_mean = 4
            d_min = resolutionizer.resolution_i_mean_over_sigma_mean(
                i_mean_over_sigma_mean)
            if params.d_min is libtbx.Auto:
                intensities = intensities.resolution_filter(
                    d_min=d_min).set_info(intensities.info())
                if params.verbose:
                    logger.info('Selecting reflections with d > %.2f' % d_min)
            elif d_min > params.d_min:
                logger.info('Rejecting dataset %s as d_min too low (%.2f)' %
                            (file_name, d_min))
                continue
            else:
                logger.info('Estimated d_min for %s: %.2f' %
                            (file_name, d_min))
        elif params.d_min not in (None, libtbx.Auto):
            intensities = intensities.resolution_filter(
                d_min=params.d_min).set_info(intensities.info())

        if params.normalisation == 'kernel':
            from mmtbx.scaling import absolute_scaling
            normalisation = absolute_scaling.kernel_normalisation(
                intensities, auto_kernel=True)
            intensities = normalisation.normalised_miller.deep_copy()

        cb_op_to_primitive = intensities.change_of_basis_op_to_primitive_setting(
        )
        intensities = intensities.change_basis(cb_op_to_primitive)
        if params.mode == 'full' or params.space_group is not None:
            if params.space_group is not None:
                space_group_info = params.space_group.primitive_setting()
                if not space_group_info.group().is_compatible_unit_cell(
                        intensities.unit_cell()):
                    logger.info(
                        'Skipping data set - incompatible space group and unit cell: %s, %s'
                        % (space_group_info, intensities.unit_cell()))
                    continue
            else:
                space_group_info = sgtbx.space_group_info('P1')
            intensities = intensities.customized_copy(
                space_group_info=space_group_info)

        datasets.append(intensities)

    crystal_symmetries = [d.crystal_symmetry().niggli_cell() for d in datasets]
    lattice_ids = range(len(datasets))
    from xfel.clustering.cluster import Cluster
    from xfel.clustering.cluster_groups import unit_cell_info
    ucs = Cluster.from_crystal_symmetries(crystal_symmetries,
                                          lattice_ids=lattice_ids)
    threshold = 1000
    if params.save_plot:
        from matplotlib import pyplot as plt
        fig = plt.figure("Andrews-Bernstein distance dendogram",
                         figsize=(12, 8))
        ax = plt.gca()
    else:
        ax = None
    clusters, _ = ucs.ab_cluster(params.unit_cell_clustering.threshold,
                                 log=params.unit_cell_clustering.log,
                                 write_file_lists=False,
                                 schnell=False,
                                 doplot=params.save_plot,
                                 ax=ax)
    if params.save_plot:
        plt.tight_layout()
        plt.savefig('%scluster_unit_cell.png' % params.plot_prefix)
        plt.close(fig)
    logger.info(unit_cell_info(clusters))
    largest_cluster = None
    largest_cluster_lattice_ids = None
    for cluster in clusters:
        cluster_lattice_ids = [m.lattice_id for m in cluster.members]
        if largest_cluster_lattice_ids is None:
            largest_cluster_lattice_ids = cluster_lattice_ids
        elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids):
            largest_cluster_lattice_ids = cluster_lattice_ids

    dataset_selection = largest_cluster_lattice_ids
    if len(dataset_selection) < len(datasets):
        logger.info('Selecting subset of data for cosym analysis: %s' %
                    str(dataset_selection))
        datasets = [datasets[i] for i in dataset_selection]

    # per-dataset change of basis operator to ensure all consistent
    change_of_basis_ops = []
    for i, dataset in enumerate(datasets):
        metric_subgroups = sgtbx.lattice_symmetry.metric_subgroups(dataset,
                                                                   max_delta=5)
        subgroup = metric_subgroups.result_groups[0]
        cb_op_inp_best = subgroup['cb_op_inp_best']
        datasets[i] = dataset.change_basis(cb_op_inp_best)
        change_of_basis_ops.append(cb_op_inp_best)

    cb_op_ref_min = datasets[0].change_of_basis_op_to_niggli_cell()
    for i, dataset in enumerate(datasets):
        if params.space_group is None:
            datasets[i] = dataset.change_basis(cb_op_ref_min).customized_copy(
                space_group_info=sgtbx.space_group_info('P1'))
        else:
            datasets[i] = dataset.change_basis(cb_op_ref_min)
            datasets[i] = datasets[i].customized_copy(
                crystal_symmetry=crystal.symmetry(
                    unit_cell=datasets[i].unit_cell(),
                    space_group_info=params.space_group.primitive_setting(),
                    assert_is_compatible_unit_cell=False))
        datasets[i] = datasets[i].merge_equivalents().array()
        change_of_basis_ops[i] = cb_op_ref_min * change_of_basis_ops[i]

    result = analyse_datasets(datasets, params)

    space_groups = {}
    reindexing_ops = {}
    for dataset_id in result.reindexing_ops.iterkeys():
        if 0 in result.reindexing_ops[dataset_id]:
            cb_op = result.reindexing_ops[dataset_id][0]
            reindexing_ops.setdefault(cb_op, [])
            reindexing_ops[cb_op].append(dataset_id)
        if dataset_id in result.space_groups:
            space_groups.setdefault(result.space_groups[dataset_id], [])
            space_groups[result.space_groups[dataset_id]].append(dataset_id)

    logger.info('Space groups:')
    for sg, datasets in space_groups.iteritems():
        logger.info(str(sg.info().reference_setting()))
        logger.info(datasets)

    logger.info('Reindexing operators:')
    for cb_op, datasets in reindexing_ops.iteritems():
        logger.info(cb_op)
        logger.info(datasets)

    if (len(experiments) and len(reflections)
            and params.output.reflections is not None
            and params.output.experiments is not None):
        import copy
        from dxtbx.model import ExperimentList
        from dxtbx.serialize import dump
        reindexed_experiments = ExperimentList()
        reindexed_reflections = flex.reflection_table()
        expt_id = 0
        for cb_op, dataset_ids in reindexing_ops.iteritems():
            cb_op = sgtbx.change_of_basis_op(cb_op)
            for dataset_id in dataset_ids:
                expt = experiments[dataset_selection[dataset_id]]
                refl = reflections[dataset_selection[dataset_id]]
                reindexed_expt = copy.deepcopy(expt)
                refl_reindexed = copy.deepcopy(refl)
                cb_op_this = cb_op * change_of_basis_ops[dataset_id]
                reindexed_expt.crystal = reindexed_expt.crystal.change_basis(
                    cb_op_this)
                refl_reindexed['miller_index'] = cb_op_this.apply(
                    refl_reindexed['miller_index'])
                reindexed_experiments.append(reindexed_expt)
                refl_reindexed['id'] = flex.int(refl_reindexed.size(), expt_id)
                reindexed_reflections.extend(refl_reindexed)
                expt_id += 1

        logger.info('Saving reindexed experiments to %s' %
                    params.output.experiments)
        dump.experiment_list(reindexed_experiments, params.output.experiments)
        logger.info('Saving reindexed reflections to %s' %
                    params.output.reflections)
        reindexed_reflections.as_pickle(params.output.reflections)

    elif params.output.suffix is not None:
        for cb_op, dataset_ids in reindexing_ops.iteritems():
            cb_op = sgtbx.change_of_basis_op(cb_op)
            for dataset_id in dataset_ids:
                file_name = files[dataset_selection[dataset_id]]
                basename = os.path.basename(file_name)
                out_name = os.path.splitext(
                    basename)[0] + params.output.suffix + '_' + str(
                        dataset_selection[dataset_id]) + ".mtz"
                reader = any_reflection_file(file_name)
                assert reader.file_type() == 'ccp4_mtz'
                mtz_object = reader.file_content()
                cb_op_this = cb_op * change_of_basis_ops[dataset_id]
                if not cb_op_this.is_identity_op():
                    logger.info('reindexing %s (%s)' %
                                (file_name, cb_op_this.as_xyz()))
                    mtz_object.change_basis_in_place(cb_op_this)
                mtz_object.write(out_name)