Python Cluster.from_files Examples

Programming Language: Python

Namespace/Package Name: xfel.clustering.cluster

Class/Type: Cluster

Method/Function: from_files

Examples at hotexamples.com: 5

Python Cluster.from_files - 5 examples found. These are the top rated real world Python examples of xfel.clustering.cluster.Cluster.from_files extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

from_crystal_symmetries(6)

from_directories(6)

from_files(4)

from_iterable(4)

Cluster(2)

__init__(1)

from_expts(1)

from_list(1)

Example #1

Show file

def run(_args):
    if _args < 2:
        raise IOError("Must provide location(s) of pickles")
    if _args.paths:
        ucs = Cluster.from_files(raw_input=_args.dirs,
                                 n_images=_args.n,
                                 dials=_args.dials)
    else:
        ucs = Cluster.from_directories(_args.dirs,
                                       n_images=_args.n,
                                       dials=_args.dials)

    if not _args.noplot:
        clusters, _ = ucs.ab_cluster(_args.t,
                                     log=_args.log,
                                     write_file_lists=_args.nofiles,
                                     schnell=_args.schnell,
                                     doplot=_args.noplot)
        print unit_cell_info(clusters)
    else:
        plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8))
        ax = plt.gca()
        clusters, cluster_axes = ucs.ab_cluster(_args.t,
                                                log=_args.log,
                                                ax=ax,
                                                write_file_lists=_args.nofiles,
                                                schnell=_args.schnell,
                                                doplot=_args.noplot)
        print unit_cell_info(clusters)
        plt.tight_layout()
        plt.show()

Example #2

Show file

    def unit_cell_analysis(self):
        """ Calls unit cell analysis module, which uses hierarchical clustering
        (Zeldin, et al, Acta D, 2015) to split integration results according to
        detected morphological groupings (if any). Most useful with preliminary
        integration without target unit cell specified. """

        # Will not run clustering if only one integration result found or if turned off
        if not self.info.categories['integrated']:
            util.main_log(self.info.logfile,
                          "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True)
            util.main_log(self.info.logfile,
                          '\n UNIT CELL CANNOT BE DETERMINED!', True)

        elif len(self.info.categories['integrated']) == 1:
            unit_cell = (self.info.cluster_iterable[0][:5])
            point_group = self.info.cluster_iterable[0][6]
            util.main_log(self.info.logfile,
                          "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True)
            uc_line = "{:<6} {:^4}:  {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, " \
                      "{:<6.2f}, {:<6.2f}".format('(1)', point_group,
                                                  unit_cell[0], unit_cell[1],
                                                  unit_cell[2],
                                                  unit_cell[3], unit_cell[4],
                                                  unit_cell[5])
            util.main_log(self.info.logfile, uc_line, True)

            self.info.best_pg = str(point_group)
            self.info.best_uc = unit_cell

        else:
            uc_table = []
            uc_summary = []

            if self.params.analysis.clustering.flag_on:
                # run hierarchical clustering analysis
                from xfel.clustering.cluster import Cluster

                counter = 0
                self.info.clusters = []

                threshold = self.params.analysis.clustering.threshold
                cluster_limit = self.params.analysis.clustering.limit
                final_pickles = self.info.categories['integrated'][0]

                pickles = []
                if self.params.analysis.clustering.n_images > 0:
                    import random

                    for i in range(
                            len(self.params.analysis.clustering.n_images)):
                        random_number = random.randrange(0, len(final_pickles))
                        if final_pickles[random_number] in pickles:
                            while final_pickles[random_number] in pickles:
                                random_number = random.randrange(
                                    0, len(final_pickles))
                            pickles.append(final_pickles[random_number])
                else:
                    pickles = final_pickles

                # Cluster from files (slow, but will keep for now)
                ucs = Cluster.from_files(pickle_list=pickles)

                # Do clustering
                clusters, _ = ucs.ab_cluster(threshold=threshold,
                                             log=False,
                                             write_file_lists=False,
                                             schnell=False,
                                             doplot=False)
                uc_table.append("\n\n{:-^80}\n" \
                                "".format(' UNIT CELL ANALYSIS '))

                # extract clustering info and add to summary output list
                if cluster_limit is None:
                    if len(pickles) / 10 >= 10:
                        cluster_limit = 10
                    else:
                        cluster_limit = len(pickles) / 10

                for cluster in clusters:
                    sorted_pg_comp = sorted(cluster.pg_composition.items(),
                                            key=lambda x: -1 * x[1])
                    pg_nums = [pg[1] for pg in sorted_pg_comp]
                    cons_pg = sorted_pg_comp[np.argmax(pg_nums)]

                    if len(cluster.members) > cluster_limit:
                        counter += 1

                        # Write to file
                        cluster_filenames = [j.path for j in cluster.members]
                        if self.params.analysis.clustering.write_files:
                            output_file = os.path.join(
                                self.info.int_base,
                                "uc_cluster_{}.lst".format(counter))
                            for fn in cluster_filenames:
                                with open(output_file, 'a') as scf:
                                    scf.write('{}\n'.format(fn))

                            mark_output = os.path.basename(output_file)
                        else:
                            mark_output = '*'
                            output_file = None

                    else:
                        mark_output = ''
                        output_file = None

                    # Populate clustering info for GUI display
                    uc_init = uctbx.unit_cell(cluster.medians)
                    symmetry = crystal.symmetry(unit_cell=uc_init,
                                                space_group_symbol='P1')
                    groups = metric_subgroups(input_symmetry=symmetry,
                                              max_delta=3)
                    top_group = groups.result_groups[0]
                    best_sg = str(groups.lattice_group_info()).split('(')[0]
                    best_uc = top_group['best_subsym'].unit_cell().parameters()
                    # best_sg = str(top_group['best_subsym'].space_group_info())

                    uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \
                                  "{:<6.2f} {:<6.2f} {:<6.2f} " \
                                  "".format(best_uc[0], best_uc[1], best_uc[2],
                                            best_uc[3], best_uc[4], best_uc[5])
                    cluster_info = {
                        'number': len(cluster.members),
                        'pg': best_sg,
                        'uc': uc_no_stdev,
                        'filename': mark_output
                    }
                    self.info.clusters.append(cluster_info)

                    # format and record output
                    # TODO: How to propagate stdevs after conversion from Niggli?
                    # uc_line = "{:<6} {:^4}:  {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                    #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                    #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f})   "\
                    #           "{}".format('({})'.format(len(cluster.members)), cons_pg[0],
                    #                                 cluster.medians[0], cluster.stdevs[0],
                    #                                 cluster.medians[1], cluster.stdevs[1],
                    #                                 cluster.medians[2], cluster.stdevs[2],
                    #                                 cluster.medians[3], cluster.stdevs[3],
                    #                                 cluster.medians[4], cluster.stdevs[4],
                    #                                 cluster.medians[5], cluster.stdevs[5],
                    #                                 mark_output)
                    # uc_table.append(uc_line)
                    uc_table.append("{:<6}:  {} {}".format(
                        len(cluster.members), uc_no_stdev, mark_output))
                    lattices = ', '.join(
                        ['{} ({})'.format(i[0], i[1]) for i in sorted_pg_comp])
                    # uc_info = [len(cluster.members), cons_pg[0], cluster.medians,
                    #            output_file, uc_line, lattices]
                    uc_info = [
                        len(cluster.members), best_sg, best_uc, output_file,
                        uc_no_stdev, lattices
                    ]
                    uc_summary.append(uc_info)

            else:
                # generate average unit cell
                uc_table.append("\n\n{:-^80}\n" \
                                "".format(' UNIT CELL AVERAGING (no clustering) '))
                uc_a, uc_b, uc_c, uc_alpha, \
                uc_beta, uc_gamma, uc_sg = list(zip(*self.info.cluster_iterable))
                cons_pg = Counter(uc_sg).most_common(1)[0][0]
                all_pgs = Counter(uc_sg).most_common()
                unit_cell = (np.median(uc_a), np.median(uc_b), np.median(uc_c),
                             np.median(uc_alpha), np.median(uc_beta),
                             np.median(uc_gamma))

                # Populate clustering info for GUI display
                uc_init = uctbx.unit_cell(unit_cell)
                symmetry = crystal.symmetry(unit_cell=uc_init,
                                            space_group_symbol='P1')
                groups = metric_subgroups(input_symmetry=symmetry, max_delta=3)
                top_group = groups.result_groups[0]
                best_sg = str(groups.lattice_group_info()).split('(')[0]
                best_uc = top_group['best_subsym'].unit_cell().parameters()
                # best_sg = str(top_group['best_subsym'].space_group_info())

                uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \
                              "{:<6.2f} {:<6.2f} {:<6.2f} " \
                              "".format(best_uc[0], best_uc[1], best_uc[2],
                                        best_uc[3], best_uc[4], best_uc[5])
                cluster_info = {
                    'number': len(self.info.cluster_iterable),
                    'pg': best_sg,
                    'uc': uc_no_stdev,
                    'filename': None
                }
                self.info.clusters.append(cluster_info)

                # uc_line = "{:<6} {:^4}:  {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \
                #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \
                #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f})   " \
                #           "{}".format('({})'.format(len(self.final_objects)), cons_pg,
                #                       np.median(uc_a), np.std(uc_a),
                #                       np.median(uc_b), np.std(uc_b),
                #                       np.median(uc_c), np.std(uc_c),
                #                       np.median(uc_alpha), np.std(uc_alpha),
                #                       np.median(uc_beta), np.std(uc_beta),
                #                       np.median(uc_gamma), np.std(uc_gamma), '')
                #
                # uc_table.append(uc_line)
                uc_table.append(uc_no_stdev)
                lattices = ', '.join(
                    ['{} ({})'.format(i[0], i[1]) for i in all_pgs])
                # uc_info = [len(self.final_objects), cons_pg, unit_cell, None,
                #            uc_line, lattices]
                uc_info = [
                    len(self.info.cluster_iterable), best_sg, best_uc, None,
                    uc_no_stdev, lattices
                ]
                uc_summary.append(uc_info)

            uc_table.append('\nMost common unit cell:\n')

            # select the most prevalent unit cell (most members in cluster)
            uc_freqs = [i[0] for i in uc_summary]
            uc_pick = uc_summary[np.argmax(uc_freqs)]
            uc_table.append(uc_pick[4])
            uc_table.append('\nBravais Lattices in Biggest Cluster: {}'
                            ''.format(uc_pick[5]))
            self.info.best_pg = str(uc_pick[1])
            self.info.best_uc = uc_pick[2]

            if uc_pick[3] is not None:
                self.prime_data_path = uc_pick[3]

            for item in uc_table:
                util.main_log(self.info.logfile, item, False)
            self.info.update(uc_table=uc_table)

            if self.gui_mode:
                return self.info.clusters

Example #3

Show file

File: iota_analysis.py Project: cctbx-xfel/cctbx_project

    def unit_cell_analysis(self, write_files=True):
        """ Calls unit cell analysis module, which uses hierarchical clustering
        (Zeldin, et al, Acta D, 2015) to split integration results according to
        detected morphological groupings (if any). Most useful with preliminary
        integration without target unit cell specified. """

        # Will not run clustering if only one integration result found or if turned off
        if self.final_objects is None:
            self.cons_uc = None
            self.cons_pg = None
            misc.main_log(self.logfile,
                          "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True)
            misc.main_log(self.logfile, '\n UNIT CELL CANNOT BE DETERMINED!',
                          True)

        elif len(self.final_objects) == 1:
            unit_cell = (self.final_objects[0].final['a'],
                         self.final_objects[0].final['b'],
                         self.final_objects[0].final['c'],
                         self.final_objects[0].final['alpha'],
                         self.final_objects[0].final['beta'],
                         self.final_objects[0].final['gamma'])
            point_group = self.final_objects[0].final['sg']
            misc.main_log(self.logfile,
                          "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True)
            uc_line = "{:<6} {:^4}:  {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, "\
                      "{:<6.2f}, {:<6.2f}".format('(1)', point_group,
                            unit_cell[0], unit_cell[1], unit_cell[2],
                            unit_cell[3], unit_cell[4], unit_cell[5])
            misc.main_log(self.logfile, uc_line, True)

            self.cons_pg = point_group
            self.cons_uc = unit_cell

        else:
            uc_table = []
            uc_summary = []

            if self.params.analysis.run_clustering:
                # run hierarchical clustering analysis
                from xfel.clustering.cluster import Cluster

                counter = 0
                ucs = Cluster.from_files(pickle_list=self.pickles, use_b=True)
                clusters, _ = ucs.ab_cluster(
                    self.params.analysis.cluster_threshold,
                    log=False,
                    write_file_lists=False,
                    schnell=False,
                    doplot=False)
                uc_table.append("\n\n{:-^80}\n"\
                                "".format(' UNIT CELL ANALYSIS '))

                # extract clustering info and add to summary output list
                if len(self.pickles) / 10 >= 10:
                    cluster_limit = 10
                else:
                    cluster_limit = len(self.pickles) / 10

                for cluster in clusters:
                    sorted_pg_comp = sorted(cluster.pg_composition.items(),
                                            key=lambda x: -1 * x[1])
                    pg_nums = [pg[1] for pg in sorted_pg_comp]
                    cons_pg = sorted_pg_comp[np.argmax(pg_nums)]

                    if len(cluster.members) > cluster_limit:
                        counter += 1

                        # Sort clustered images by mosaicity, lowest to highest
                        cluster_filenames = [j.path for j in cluster.members]
                        clustered_objects = [i for i in self.final_objects if \
                                             i.final['final'] in cluster_filenames]
                        sorted_cluster = sorted(clustered_objects,
                                                key=lambda i: i.final['mos'])
                        # Write to file
                        if write_files:
                            output_file = os.path.join(
                                self.output_dir,
                                "uc_cluster_{}.lst".format(counter))
                            for obj in sorted_cluster:
                                with open(output_file, 'a') as scf:
                                    scf.write('{}\n'.format(
                                        obj.final['final']))

                            mark_output = os.path.basename(output_file)
                        else:
                            mark_output = '*'
                            output_file = None

                        # Populate clustering info for GUI display
                        uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \
                                      "{:<6.2f} {:<6.2f} {:<6.2f} " \
                                      "".format(cluster.medians[0], cluster.medians[1],
                                                cluster.medians[2], cluster.medians[3],
                                                cluster.medians[4], cluster.medians[5])
                        cluster_info = {
                            'number': len(cluster.members),
                            'pg': cons_pg[0],
                            'uc': uc_no_stdev,
                            'filename': mark_output
                        }
                        self.clusters.append(cluster_info)

                    else:
                        mark_output = ''
                        output_file = None

                    # format and record output
                    uc_line = "{:<6} {:^4}:  {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                              "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                              "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f})   "\
                              "{}".format('({})'.format(len(cluster.members)), cons_pg[0],
                                                    cluster.medians[0], cluster.stdevs[0],
                                                    cluster.medians[1], cluster.stdevs[1],
                                                    cluster.medians[2], cluster.stdevs[2],
                                                    cluster.medians[3], cluster.stdevs[3],
                                                    cluster.medians[4], cluster.stdevs[4],
                                                    cluster.medians[5], cluster.stdevs[5],
                                                    mark_output)
                    uc_table.append(uc_line)
                    lattices = ', '.join(
                        ['{} ({})'.format(i[0], i[1]) for i in sorted_pg_comp])
                    uc_info = [
                        len(cluster.members), cons_pg[0], cluster.medians,
                        output_file, uc_line, lattices
                    ]
                    uc_summary.append(uc_info)

            else:

                # generate average unit cell
                uc_table.append("\n\n{:-^80}\n" \
                                "".format(' UNIT CELL AVERAGING (no clustering) '))
                uc_a = [i.final['a'] for i in self.final_objects]
                uc_b = [i.final['b'] for i in self.final_objects]
                uc_c = [i.final['c'] for i in self.final_objects]
                uc_alpha = [i.final['alpha'] for i in self.final_objects]
                uc_beta = [i.final['beta'] for i in self.final_objects]
                uc_gamma = [i.final['gamma'] for i in self.final_objects]
                uc_sg = [i.final['sg'] for i in self.final_objects]
                cons_pg = Counter(uc_sg).most_common(1)[0][0]
                all_pgs = Counter(uc_sg).most_common()
                uc_line = "{:<6} {:^4}:  {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \
                          "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \
                          "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f})   " \
                          "{}".format('({})'.format(len(self.final_objects)), cons_pg,
                                      np.median(uc_a), np.std(uc_a),
                                      np.median(uc_b), np.std(uc_b),
                                      np.median(uc_c), np.std(uc_c),
                                      np.median(uc_alpha), np.std(uc_alpha),
                                      np.median(uc_beta), np.std(uc_beta),
                                      np.median(uc_gamma), np.std(uc_gamma), '')
                unit_cell = (np.median(uc_a), np.median(uc_b), np.median(uc_c),
                             np.median(uc_alpha), np.median(uc_beta),
                             np.median(uc_gamma))
                uc_table.append(uc_line)
                lattices = ', '.join(
                    ['{} ({})'.format(i[0], i[1]) for i in all_pgs])
                uc_info = [
                    len(self.final_objects), cons_pg, unit_cell, None, uc_line,
                    lattices
                ]
                uc_summary.append(uc_info)

            uc_table.append('\nMost common unit cell:\n')

            # select the most prevalent unit cell (most members in cluster)
            uc_freqs = [i[0] for i in uc_summary]
            uc_pick = uc_summary[np.argmax(uc_freqs)]
            uc_table.append(uc_pick[4])
            uc_table.append('\nBravais Lattices in Biggest Cluster: {}'
                            ''.format(uc_pick[5]))

            self.cons_pg = uc_pick[1]
            self.cons_uc = uc_pick[2]

            if uc_pick[3] != None:
                self.prime_data_path = uc_pick[3]

            for item in uc_table:
                misc.main_log(self.logfile, item, (not self.gui_mode))

            if self.gui_mode:
                return self.cons_pg, self.cons_uc, self.clusters

Example #4

Show file

File: iota_analysis.py Project: keitaroyam/cctbx_fork

  def unit_cell_analysis(self,
                         cluster_threshold,
                         output_dir,
                         write_files=True):
    """ Calls unit cell analysis module, which uses hierarchical clustering
        (Zeldin, et al, Acta D, 2015) to split integration results according to
        detected morphological groupings (if any). Most useful with preliminary
        integration without target unit cell specified. """

    # Will not run clustering if only one integration result found
    if len(self.final_objects) == 1:
      unit_cell = (self.final_objects[0].final['a'],
                   self.final_objects[0].final['b'],
                   self.final_objects[0].final['c'],
                   self.final_objects[0].final['alpha'],
                   self.final_objects[0].final['beta'],
                   self.final_objects[0].final['gamma'])
      point_group = self.final_objects[0].final['sg']
      misc.main_log(self.logfile,
                    "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True)
      uc_line = "{:<6} {:^4}:  {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, "\
                "{:<6.2f}, {:<6.2f}".format('(1)', point_group,
                      unit_cell[0], unit_cell[1], unit_cell[2],
                      unit_cell[3], unit_cell[4], unit_cell[5])
      misc.main_log(self.logfile, uc_line, True)

      self.cons_pg = point_group
      self.cons_uc = unit_cell

    else:
      uc_table = []
      uc_summary = []
      counter = 1

      # run hierarchical clustering analysis
      ucs = Cluster.from_files(self.pickles, use_b=True)
      clusters, _ = ucs.ab_cluster(cluster_threshold, log=False,
                                   write_file_lists=False, schnell=False,
                                   doplot=False)
      uc_table.append("\n\n{:-^80}\n"\
                      "".format(' UNIT CELL ANALYSIS '))

      # extract clustering info and add to summary output list
      for cluster in clusters:
        sorted_pg_comp = sorted(cluster.pg_composition.items(),
                                  key=lambda x: -1 * x[1])
        pg_nums = [pg[1] for pg in sorted_pg_comp]
        cons_pg = sorted_pg_comp[np.argmax(pg_nums)]

        output_file = os.path.join(output_dir, "uc_cluster_{}.lst".format(counter))

        # write out lists of output pickles that comprise clusters with > 1 members
        if len(cluster.members) > 1:
          counter += 1

          # Sort clustered images by mosaicity, lowest to highest
          cluster_filenames = [j.path for j in cluster.members]
          clustered_objects = [i for i in self.final_objects if \
                               i.final['final'] in cluster_filenames]
          sorted_cluster = sorted(clustered_objects,
                                  key=lambda i: i.final['mos'])
          # Write to file
          if write_files:
            for obj in sorted_cluster:
              with open(output_file, 'a') as scf:
                scf.write('{}\n'.format(obj.final['final']))

            mark_output = os.path.basename(output_file)
          else:
            mark_output = '*'
            output_file = None
        else:
          mark_output = ''
          output_file = None

        # format and record output
        uc_line = "{:<6} {:^4}:  {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                  "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                  "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f})   "\
                  "{}".format('({})'.format(len(cluster.members)), cons_pg[0],
                                        cluster.medians[0], cluster.stdevs[0],
                                        cluster.medians[1], cluster.stdevs[1],
                                        cluster.medians[2], cluster.stdevs[2],
                                        cluster.medians[3], cluster.stdevs[3],
                                        cluster.medians[4], cluster.stdevs[4],
                                        cluster.medians[5], cluster.stdevs[5],
                                        mark_output)
        uc_table.append(uc_line)
        uc_info = [len(cluster.members), cons_pg[0], cluster.medians,
                   output_file, uc_line]
        uc_summary.append(uc_info)

      uc_table.append('\nMost common unit cell:\n')

      # select the most prevalent unit cell (most members in cluster)
      uc_freqs = [i[0] for i in uc_summary]
      uc_pick = uc_summary[np.argmax(uc_freqs)]
      uc_table.append(uc_pick[4])

      self.cons_pg = uc_pick[1]
      self.cons_uc = uc_pick[2]

      if uc_pick[3] != None:
        self.prime_data_path = uc_pick[3]

      for item in uc_table:
          misc.main_log(self.logfile, item, True)

Example #5

Show file

    def unit_cell_analysis(self):
        """ Calls unit cell analysis module, which uses hierarchical clustering
        (Zeldin, et al, Acta D, 2015) to split integration results according to
        detected morphological groupings (if any). Most useful with preliminary
        integration without target unit cell specified. """

        # Will not run clustering if only one integration result found or if turned off
        if self.final_objects is None:
            self.cons_uc = None
            self.cons_pg = None
            misc.main_log(self.logfile,
                          "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True)
            misc.main_log(self.logfile, '\n UNIT CELL CANNOT BE DETERMINED!',
                          True)

        elif len(self.final_objects) == 1:
            unit_cell = (self.final_objects[0].final['a'],
                         self.final_objects[0].final['b'],
                         self.final_objects[0].final['c'],
                         self.final_objects[0].final['alpha'],
                         self.final_objects[0].final['beta'],
                         self.final_objects[0].final['gamma'])
            point_group = self.final_objects[0].final['sg']
            misc.main_log(self.logfile,
                          "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True)
            uc_line = "{:<6} {:^4}:  {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, "\
                      "{:<6.2f}, {:<6.2f}".format('(1)', point_group,
                            unit_cell[0], unit_cell[1], unit_cell[2],
                            unit_cell[3], unit_cell[4], unit_cell[5])
            misc.main_log(self.logfile, uc_line, True)

            self.cons_pg = point_group
            self.cons_uc = unit_cell

        else:
            uc_table = []
            uc_summary = []

            if self.params.analysis.run_clustering:
                # run hierarchical clustering analysis
                from xfel.clustering.cluster import Cluster
                counter = 0

                threshold = self.params.analysis.cluster_threshold
                cluster_limit = self.params.analysis.cluster_limit
                if self.params.analysis.cluster_n_images > 0:
                    n_images = self.params.analysis.cluster_n_images
                else:
                    n_images = len(self.final_objects)

                obj_list = []
                if n_images < len(self.final_objects):
                    import random
                    for i in range(n_images):
                        random_number = random.randrange(
                            0, len(self.final_objects))
                        if self.final_objects[random_number] in obj_list:
                            while self.final_objects[
                                    random_number] in obj_list:
                                random_number = random.randrange(
                                    0, len(self.final_objects))
                            obj_list.append(self.final_objects[random_number])
                        else:
                            obj_list.append(self.final_objects[random_number])
                if obj_list == []:
                    obj_list = self.final_objects

                # Cluster from iterable (this doesn't keep filenames - bad!)
                # with Capturing() as suppressed_output:
                #   uc_iterable = []
                #   for obj in obj_list:
                #     unit_cell = (float(obj.final['a']),
                #                  float(obj.final['b']),
                #                  float(obj.final['c']),
                #                  float(obj.final['alpha']),
                #                  float(obj.final['beta']),
                #                  float(obj.final['gamma']),
                #                  obj.final['sg'])
                #     uc_iterable.append(unit_cell)
                #   ucs = Cluster.from_iterable(iterable=uc_iterable)

                # Cluster from files (slow, but will keep for now)
                ucs = Cluster.from_files(pickle_list=self.pickles)

                # Do clustering
                clusters, _ = ucs.ab_cluster(threshold=threshold,
                                             log=False,
                                             write_file_lists=False,
                                             schnell=False,
                                             doplot=False)
                uc_table.append("\n\n{:-^80}\n"\
                                "".format(' UNIT CELL ANALYSIS '))

                # extract clustering info and add to summary output list
                if cluster_limit is None:
                    if len(self.pickles) / 10 >= 10:
                        cluster_limit = 10
                    else:
                        cluster_limit = len(self.pickles) / 10

                for cluster in clusters:
                    sorted_pg_comp = sorted(cluster.pg_composition.items(),
                                            key=lambda x: -1 * x[1])
                    pg_nums = [pg[1] for pg in sorted_pg_comp]
                    cons_pg = sorted_pg_comp[np.argmax(pg_nums)]

                    if len(cluster.members) > cluster_limit:
                        counter += 1

                        # Sort clustered images by mosaicity, lowest to highest
                        cluster_filenames = [j.path for j in cluster.members]
                        clustered_objects = [i for i in self.final_objects if \
                                             i.final['final'] in cluster_filenames]
                        sorted_cluster = sorted(clustered_objects,
                                                key=lambda i: i.final['mos'])
                        # Write to file
                        if self.params.analysis.cluster_write_files:
                            output_file = os.path.join(
                                self.output_dir,
                                "uc_cluster_{}.lst".format(counter))
                            for obj in sorted_cluster:
                                with open(output_file, 'a') as scf:
                                    scf.write('{}\n'.format(
                                        obj.final['final']))

                            mark_output = os.path.basename(output_file)
                        else:
                            mark_output = '*'
                            output_file = None

                    else:
                        mark_output = ''
                        output_file = None

                    # Populate clustering info for GUI display
                    uc_init = uctbx.unit_cell(cluster.medians)
                    symmetry = crystal.symmetry(unit_cell=uc_init,
                                                space_group_symbol='P1')
                    groups = sgtbx.lattice_symmetry.\
                      metric_subgroups(input_symmetry=symmetry, max_delta=3)
                    top_group = groups.result_groups[0]
                    best_uc = top_group['best_subsym'].unit_cell().parameters()
                    best_sg = top_group['best_subsym'].space_group_info()

                    uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \
                                  "{:<6.2f} {:<6.2f} {:<6.2f} " \
                                  "".format(best_uc[0], best_uc[1], best_uc[2],
                                            best_uc[3], best_uc[4], best_uc[5])
                    cluster_info = {
                        'number': len(cluster.members),
                        'pg': best_sg,
                        'uc': uc_no_stdev,
                        'filename': mark_output
                    }
                    self.clusters.append(cluster_info)

                    # format and record output
                    # TODO: How to propagate stdevs after conversion from Niggli?
                    # uc_line = "{:<6} {:^4}:  {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                    #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\
                    #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f})   "\
                    #           "{}".format('({})'.format(len(cluster.members)), cons_pg[0],
                    #                                 cluster.medians[0], cluster.stdevs[0],
                    #                                 cluster.medians[1], cluster.stdevs[1],
                    #                                 cluster.medians[2], cluster.stdevs[2],
                    #                                 cluster.medians[3], cluster.stdevs[3],
                    #                                 cluster.medians[4], cluster.stdevs[4],
                    #                                 cluster.medians[5], cluster.stdevs[5],
                    #                                 mark_output)
                    # uc_table.append(uc_line)
                    uc_table.append("{:<6}:  {} {}".format(
                        len(cluster.members), uc_no_stdev, mark_output))
                    lattices = ', '.join(
                        ['{} ({})'.format(i[0], i[1]) for i in sorted_pg_comp])
                    # uc_info = [len(cluster.members), cons_pg[0], cluster.medians,
                    #            output_file, uc_line, lattices]
                    uc_info = [
                        len(cluster.members), best_sg, best_uc, output_file,
                        uc_no_stdev, lattices
                    ]
                    uc_summary.append(uc_info)

            else:

                # generate average unit cell
                uc_table.append("\n\n{:-^80}\n" \
                                "".format(' UNIT CELL AVERAGING (no clustering) '))
                uc_a = [i.final['a'] for i in self.final_objects]
                uc_b = [i.final['b'] for i in self.final_objects]
                uc_c = [i.final['c'] for i in self.final_objects]
                uc_alpha = [i.final['alpha'] for i in self.final_objects]
                uc_beta = [i.final['beta'] for i in self.final_objects]
                uc_gamma = [i.final['gamma'] for i in self.final_objects]
                uc_sg = [i.final['sg'] for i in self.final_objects]
                cons_pg = Counter(uc_sg).most_common(1)[0][0]
                all_pgs = Counter(uc_sg).most_common()
                unit_cell = (np.median(uc_a), np.median(uc_b), np.median(uc_c),
                             np.median(uc_alpha), np.median(uc_beta),
                             np.median(uc_gamma))

                # Populate clustering info for GUI display
                uc_init = uctbx.unit_cell(unit_cell)
                symmetry = crystal.symmetry(unit_cell=uc_init,
                                            space_group_symbol='P1')
                groups = sgtbx.lattice_symmetry. \
                  metric_subgroups(input_symmetry=symmetry, max_delta=3)
                top_group = groups.result_groups[0]
                best_uc = top_group['best_subsym'].unit_cell().parameters()
                best_sg = top_group['best_subsym'].space_group_info()

                uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \
                              "{:<6.2f} {:<6.2f} {:<6.2f} " \
                              "".format(best_uc[0], best_uc[1], best_uc[2],
                                        best_uc[3], best_uc[4], best_uc[5])
                cluster_info = {
                    'number': len(self.final_objects),
                    'pg': best_sg,
                    'uc': uc_no_stdev,
                    'filename': None
                }
                self.clusters.append(cluster_info)

                # uc_line = "{:<6} {:^4}:  {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \
                #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \
                #           "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f})   " \
                #           "{}".format('({})'.format(len(self.final_objects)), cons_pg,
                #                       np.median(uc_a), np.std(uc_a),
                #                       np.median(uc_b), np.std(uc_b),
                #                       np.median(uc_c), np.std(uc_c),
                #                       np.median(uc_alpha), np.std(uc_alpha),
                #                       np.median(uc_beta), np.std(uc_beta),
                #                       np.median(uc_gamma), np.std(uc_gamma), '')
                #
                # uc_table.append(uc_line)
                uc_table.append(uc_no_stdev)
                lattices = ', '.join(
                    ['{} ({})'.format(i[0], i[1]) for i in all_pgs])
                # uc_info = [len(self.final_objects), cons_pg, unit_cell, None,
                #            uc_line, lattices]
                uc_info = [
                    len(self.final_objects), best_sg, best_uc, None,
                    uc_no_stdev, lattices
                ]
                uc_summary.append(uc_info)

            uc_table.append('\nMost common unit cell:\n')

            # select the most prevalent unit cell (most members in cluster)
            uc_freqs = [i[0] for i in uc_summary]
            uc_pick = uc_summary[np.argmax(uc_freqs)]
            uc_table.append(uc_pick[4])
            uc_table.append('\nBravais Lattices in Biggest Cluster: {}'
                            ''.format(uc_pick[5]))
            self.cons_pg = uc_pick[1]
            self.cons_uc = uc_pick[2]

            if uc_pick[3] != None:
                self.prime_data_path = uc_pick[3]

            for item in uc_table:
                misc.main_log(self.logfile, item, (not self.gui_mode))

            self.analysis_result.__setattr__('clusters', self.clusters)
            self.analysis_result.__setattr__('cons_pg', self.cons_pg)
            self.analysis_result.__setattr__('cons_uc', self.cons_uc)

            if self.gui_mode:
                return self.clusters