Exemple #1
0
    def _goniometer(self):
        """Initialize the structure for the goniometer."""

        values = [
            float(e)
            for e in self._header_dictionary["CRYSTAL_GONIO_VALUES"].split()
        ]
        names = [
            e.strip()
            for e in self._header_dictionary["CRYSTAL_GONIO_NAMES"].split()
        ]
        units = [
            e.strip()
            for e in self._header_dictionary["CRYSTAL_GONIO_UNITS"].split()
        ]
        axis_elts = [
            float(e)
            for e in self._header_dictionary["CRYSTAL_GONIO_VECTORS"].split()
        ]
        axes = [
            matrix.col(axis_elts[3 * j:3 * (j + 1)]) for j in range(len(units))
        ]
        scan_axis = self._header_dictionary["ROTATION_AXIS_NAME"].strip()

        # Take only elements that have corresponding units of 'deg' (which is
        # probably all of them).
        filt = [e == "deg" for e in units]
        values = [e for e, f in zip(values, filt) if f]
        names = [e for e, f in zip(names, filt) if f]
        axes = [e for e, f in zip(axes, filt) if f]

        # Multi-axis gonio requires axes in order as viewed from crystal to gonio
        # base. Assume the SMV header records them in reverse order.

        axes = flex.vec3_double(reversed(axes))
        names = flex.std_string(reversed(names))
        values = flex.double(reversed(values))
        scan_axis = flex.first_index(names, scan_axis)

        gonio = self._goniometer_factory.make_multi_axis_goniometer(
            axes, values, names, scan_axis)

        # The calculated rotation axis is also recorded in the header. We could
        # use this to check that the goniometer is as expected
        rot_axis = tuple(
            map(float, self._header_dictionary["ROTATION_VECTOR"].split()))
        for e1, e2 in zip(rot_axis, gonio.get_rotation_axis()):
            assert abs(e1 - e2) < 1e-6

        return gonio
    def _goniometer(self):
        """Construct a goniometer from the records in the mini CBF header."""

        if (
            "Alpha" in self._cif_header_dictionary
            and "Kappa" in self._cif_header_dictionary
        ):
            # Kappa
            alpha = float(self._cif_header_dictionary["Alpha"].split()[0])
            omega = float(self._cif_header_dictionary["Chi"].split()[0])
            kappa = float(self._cif_header_dictionary["Kappa"].split()[0])
            phi = float(self._cif_header_dictionary["Phi"].split()[0])

            axis = self._cif_header_dictionary["Oscillation_axis"]

            scanaxis = {"OMEGA": "Omega", "PHI": "Phi"}

            assert axis in scanaxis

            # this is the direction the arm points in at datum
            direction = "+z"

            return self._goniometer_factory.make_kappa_goniometer(
                alpha, omega, kappa, phi, direction, scanaxis[axis]
            )

        else:
            # Smargon
            phi = float(self._cif_header_dictionary["Phi"].split()[0])
            chi = float(self._cif_header_dictionary["Chi"].split()[0])
            omega = float(self._cif_header_dictionary["Omega"].split()[0])

            names = flex.std_string(("PHI", "CHI", "OMEGA"))
            axes = flex.vec3_double(((1, 0, 0), (0, 0, -1), (1, 0, 0)))
            angles = flex.double((phi, chi, omega))

            axis = self._cif_header_dictionary["Oscillation_axis"].upper()
            assert axis in names, axis
            scan_axis = flex.first_index(names, axis)

            return self._goniometer_factory.make_multi_axis_goniometer(
                axes, angles, names, scan_axis
            )
Exemple #3
0
    def _goniometer(self):
        # goniometer angles in ANGLES are 2-theta, omega, phi, chi (FIXED)
        # AXIS indexes into this list to define the scan axis (in FORTRAN counting)
        # START and RANGE define the start and step size for each image

        _, omega, phi, chi = map(float, self.header_dict["ANGLES"].split())
        scan_axis = ["NONE", "2THETA", "OMEGA", "PHI", "CHI", "X", "Y", "Z"]
        scan_axis = scan_axis[int(self.header_dict["AXIS"])]
        names = flex.std_string(("PHI", "CHI", "OMEGA"))
        scan_axis = flex.first_index(names, scan_axis)
        if scan_axis is None:
            scan_axis = "OMEGA"  # default

        # https://journals.iucr.org/d/issues/2014/10/00/dz5309/dz5309sup1.pdf
        axes = flex.vec3_double(((0, -1, 0), (0, 0, 1), (0, 1, 0)))
        omega -= 180
        angles = flex.double((phi, chi, omega))

        return self._goniometer_factory.make_multi_axis_goniometer(
            axes, angles, names, scan_axis)
Exemple #4
0
    def _goniometer(self):
        '''Construct a goniometer from the records in the mini CBF header.'''

        if ('Alpha' in self._cif_header_dictionary
                and 'Kappa' in self._cif_header_dictionary):
            # Kappa
            alpha = float(self._cif_header_dictionary['Alpha'].split()[0])
            omega = float(self._cif_header_dictionary['Chi'].split()[0])
            kappa = float(self._cif_header_dictionary['Kappa'].split()[0])
            phi = float(self._cif_header_dictionary['Phi'].split()[0])

            axis = self._cif_header_dictionary['Oscillation_axis']

            scanaxis = {'OMEGA': 'Omega', 'PHI': 'Phi'}

            assert axis in scanaxis

            # this is the direction the arm points in at datum
            direction = '+z'

            return self._goniometer_factory.make_kappa_goniometer(
                alpha, omega, kappa, phi, direction, scanaxis[axis])

        else:
            # Smargon
            from scitbx.array_family import flex

            phi = float(self._cif_header_dictionary['Phi'].split()[0])
            chi = float(self._cif_header_dictionary['Chi'].split()[0])
            omega = float(self._cif_header_dictionary['Omega'].split()[0])

            names = flex.std_string(("PHI", "CHI", "OMEGA"))
            axes = flex.vec3_double(((1, 0, 0), (0, 0, -1), (1, 0, 0)))
            angles = flex.double((phi, chi, omega))

            axis = self._cif_header_dictionary['Oscillation_axis'].upper()
            assert axis in names, axis
            scan_axis = flex.first_index(names, axis)

            return self._goniometer_factory.make_multi_axis_goniometer(
                axes, angles, names, scan_axis)
def find_delta(rho_map, tol):
  """ Find delta as hinted on fig. 1 of ref. [1] in module charge_flipping """
  rho = rho_map.real_map_unpadded().as_1d()
  max_rho = flex.max(rho)
  rho /= max_rho
  sorting = flex.sort_permutation(rho)
  sorted_rho = rho.select(sorting)
  n = len(sorted_rho)
  p,q = n//4, 3*n//4
  indexes = flex.double_range(p,q)
  values = sorted_rho[p:q]
  c = flex.linear_correlation(indexes, values)
  assert c.is_well_defined() and c.coefficient() > 0.99
  r = flex.linear_regression(indexes, values)
  a,b = r.y_intercept(), r.slope()
  deviation = flex.abs(a + b*flex.double_range(n) - sorted_rho)
  non_linear_sel = deviation > tol
  low = flex.first_index(non_linear_sel, False)
  high = flex.last_index(non_linear_sel, False)
  assert non_linear_sel[low:high].count(False)/(high-low+1) > 0.99
  assert sorted_rho[low] < 0 and sorted_rho[high] > 0
  return min(sorted_rho[high], -sorted_rho[low]), max_rho
Exemple #6
0
    def _silhouette_analysis(self, cluster_labels, linkage_matrix, n_clusters,
                             min_silhouette_score):
        """Compare valid equal-sized clustering using silhouette scores.

        Args:
          cluster_labels (scitbx.array_family.flex.int):
          linkage_matrix (numpy.ndarray): The hierarchical clustering of centroids of the
            initial clustering as produced by
            :func:`scipy.cluster.hierarchy.linkage`.
          n_clusters (int): Optionally override the automatic determination of the
            number of clusters.
          min_silhouette_score (float): The minimum silhouette score to be used
            in automatic determination of the number of clusters.

        Returns:
          cluster_labels (scitbx.array_family.flex.int): A label for each coordinate.
        """
        eps = 1e-6
        X = self.coords.as_numpy_array()

        cluster_labels_input = cluster_labels
        distances = linkage_matrix[::, 2]
        distances = np.insert(distances, 0, 0)
        silhouette_scores = flex.double()
        thresholds = flex.double()
        threshold_n_clusters = flex.size_t()
        for threshold in distances[1:]:
            cluster_labels = cluster_labels_input.deep_copy()
            labels = hierarchy.fcluster(linkage_matrix,
                                        threshold - eps,
                                        criterion="distance").tolist()
            counts = [labels.count(l) for l in set(labels)]
            if len(set(counts)) > 1:
                # only equal-sized clusters are valid
                continue

            n = len(set(labels))
            if n == 1:
                continue
            elif n_clusters is not Auto and n != n_clusters:
                continue
            for i in range(len(labels)):
                cluster_labels.set_selected(cluster_labels_input == i,
                                            int(labels[i] - 1))
            if len(set(cluster_labels)) == X.shape[0]:
                # silhouette coefficient not defined if 1 dataset per cluster
                # not sure what the default value should be
                sample_silhouette_values = np.full(cluster_labels.size(), 0)
            else:
                # Compute the silhouette scores for each sample
                sample_silhouette_values = metrics.silhouette_samples(
                    X, cluster_labels.as_numpy_array(), metric="cosine")
            silhouette_avg = sample_silhouette_values.mean()
            silhouette_scores.append(silhouette_avg)
            thresholds.append(threshold)
            threshold_n_clusters.append(n)

            count_negative = (sample_silhouette_values < 0).sum()
            logger.info("Clustering:")
            logger.info("  Number of clusters: %i" % n)
            logger.info("  Threshold score: %.3f (%.1f deg)" %
                        (threshold, math.degrees(math.acos(1 - threshold))))
            logger.info("  Silhouette score: %.3f" % silhouette_avg)
            logger.info("  -ve silhouette scores: %.1f%%" %
                        (100 * count_negative / sample_silhouette_values.size))

        if n_clusters is Auto:
            idx = flex.max_index(silhouette_scores)
        else:
            idx = flex.first_index(threshold_n_clusters, n_clusters)
            if idx is None:
                raise Sorry("No valid clustering with %i clusters" %
                            n_clusters)

        if n_clusters is Auto and silhouette_scores[idx] < min_silhouette_score:
            # assume single cluster
            cluster_labels = flex.int(cluster_labels.size(), 0)
        else:
            threshold = thresholds[idx] - eps
            labels = hierarchy.fcluster(linkage_matrix,
                                        threshold,
                                        criterion="distance")
            cluster_labels = flex.double(cluster_labels.size(), -1)
            for i in range(len(labels)):
                cluster_labels.set_selected(cluster_labels_input == i,
                                            float(labels[i] - 1))

        return cluster_labels, threshold
Exemple #7
0
    def imgCIF_H(cbf_handle):
        """Initialize a goniometer model from an imgCIF file handle, where
        it is assumed that the file has already been read."""

        # find the goniometer axes and dependencies
        axis_names = flex.std_string()
        depends_on = flex.std_string()
        axes = flex.vec3_double()
        angles = flex.double()
        scan_axis = None
        cbf_handle.find_category(b"axis")
        for i in range(cbf_handle.count_rows()):
            cbf_handle.find_column(b"equipment")
            if cbf_handle.get_value() == b"goniometer":
                cbf_handle.find_column(b"id")
                axis_names.append(cbf_handle.get_value())
                axis = []
                for i in range(3):
                    cbf_handle.find_column(b"vector[%i]" % (i + 1))
                    axis.append(float(cbf_handle.get_value()))
                axes.append(axis)
                cbf_handle.find_column(b"depends_on")
                depends_on.append(cbf_handle.get_value())
            cbf_handle.next_row()

        # find the starting angles of each goniometer axis and figure out which one
        # is the scan axis (i.e. non-zero angle_increment)
        cbf_handle.find_category(b"diffrn_scan_axis")
        for i in range(cbf_handle.count_rows()):
            cbf_handle.find_column(b"axis_id")
            axis_name = cbf_handle.get_value()
            if axis_name.decode() not in axis_names:
                cbf_handle.next_row()
                continue
            cbf_handle.find_column(b"angle_start")
            axis_angle = float(cbf_handle.get_value())
            cbf_handle.find_column(b"angle_increment")
            increment = float(cbf_handle.get_value())
            angles.append(axis_angle)
            if abs(increment) > 0:
                assert (
                    scan_axis is None
                ), "More than one scan axis is defined: not currently supported"
                scan_axis = flex.first_index(axis_names, axis_name)
            cbf_handle.next_row()
        assert axes.size() == angles.size()
        if scan_axis is None:
            # probably a still shot -> scan axis arbitrary as no scan
            scan_axis = 0

        # figure out the order of the axes from the depends_on values
        order = flex.size_t()
        for i in range(axes.size()):
            if depends_on[i] == ".":
                o = 0
            else:
                o = flex.first_index(axis_names, depends_on[i]) + 1
            assert o not in order
            order.append(o)

        # multi-axis gonio requires axes in order as viewed from crystal to gonio base
        # i.e. the reverse of the order we have from cbf header
        order = order.reversed()
        axes = axes.select(order)
        angles = angles.select(order)
        axis_names = axis_names.select(order)
        scan_axis = axes.size() - scan_axis - 1

        # construct a multi-axis goniometer
        gonio = GoniometerFactory.multi_axis(axes, angles, axis_names,
                                             scan_axis)
        return gonio
Exemple #8
0
 def get_closest_idx(data, val):
     deltas = flex.abs(data - val)
     return flex.first_index(deltas, flex.min(deltas))
  def run(self):
    '''Execute the script.'''
    import os, math
    from cctbx.crystal import symmetry
    from scitbx.array_family import flex
    from libtbx import table_utils, easy_pickle
    from xfel.command_line.cspad_cbf_metrology import find_files
    from dxtbx.model.experiment.experiment_list import ExperimentListFactory
    table_header = ["","","","I","IsigI","N >","RMSD","Cutoff"]
    table_header2 = ["Bin","Resolution Range","Completeness","","","cutoff","(um)",""]

    # Parse the command line
    params, options, all_paths = self.parser.parse_args(show_diff_phil=False, return_unhandled=True)
    exp_paths = []
    refl_paths = []
    for path in all_paths:
      exps, refs = find_files(path, "integrated")
      exp_paths.extend(exps)
      refl_paths.extend(refs)
    assert len(exp_paths) == len(refl_paths)

    best_data = {}
    best_limits = flex.double()
    for exp_path, refl_path in zip(exp_paths, refl_paths):
      experiments = ExperimentListFactory.from_json_file(exp_path)
      reflections = easy_pickle.load(refl_path)
      exp_name = os.path.basename(exp_path)
      if exp_name.startswith("idx-") and exp_name.endswith("_refined_experiments.json"):
        tag = exp_name.lstrip("idx-").rstrip("_refined_experiments.json")
      else:
        tag = "%s, %s"%(exp_path, refl_path)

      for exp_id, experiment in enumerate(experiments):
        print "*"*80
        print "Data table for", tag
        table_data = []
        table_data.append(table_header)
        table_data.append(table_header2)

        crystal = experiment.crystal
        refls = reflections.select(reflections['id'] == exp_id)
        sym = symmetry(unit_cell = crystal.get_unit_cell(), space_group = crystal.get_space_group())
        d = crystal.get_unit_cell().d(refls['miller_index'])
        mset = sym.miller_set(indices = refls['miller_index'].select(d>=params.d_min), anomalous_flag=False)
        binner = mset.setup_binner(n_bins=params.n_bins)
        acceptable_resolution_bins = []
        for i in binner.range_used():
          d_max, d_min = binner.bin_d_range(i)
          sel = (d <= d_max) & (d > d_min)
          sel &= refls['intensity.sum.value'] > 0
          bin_refls = refls.select(sel)
          n_refls = len(bin_refls)
          avg_i = flex.mean(bin_refls['intensity.sum.value']) if n_refls > 0 else 0
          avg_i_sigi = flex.mean(bin_refls['intensity.sum.value'] /
                                 flex.sqrt(bin_refls['intensity.sum.variance'])) if n_refls > 0 else 0
          acceptable_resolution_bins.append(avg_i_sigi >= params.sig_filter_sigma)

          bright_refls = bin_refls.select((bin_refls['intensity.sum.value']/flex.sqrt(bin_refls['intensity.sum.variance'])) >= params.sig_filter_sigma)
          n_bright = len(bright_refls)

          rmsd_obs = 1000*math.sqrt((bright_refls['xyzcal.mm']-bright_refls['xyzobs.mm.value']).sum_sq()/n_bright) if n_bright > 0 else 0

          table_row = []
          table_row.append("%3d"%i)
          table_row.append("%-13s"%binner.bin_legend(i_bin=i,show_bin_number=False,show_bin_range=False,
                                                     show_d_range=True, show_counts=False))
          table_row.append("%13s"%binner.bin_legend(i_bin=i,show_bin_number=False,show_bin_range=False,
                                                    show_d_range=False, show_counts=True))

          table_row.append("%.1f"%(avg_i))
          table_row.append("%.1f"%(avg_i_sigi))
          table_row.append("%3d"%n_bright)
          table_row.append("%.1f"%(rmsd_obs))
          table_data.append(table_row)

        acceptable_resolution_bins = [acceptable_resolution_bins[i] for i in xrange(len(acceptable_resolution_bins))
                                      if False not in acceptable_resolution_bins[:i+1]]

        for b, row in zip(acceptable_resolution_bins, table_data[2:]):
          if b:
            row.append("X")
        print table_utils.format(table_data,has_header=2,justify='center',delim=" ")

        if any(acceptable_resolution_bins):
          best_index = acceptable_resolution_bins.count(True)-1
          best_row = table_data[best_index+2]
          d_min = binner.bin_d_range(binner.range_used()[best_index])[1]
          if len(best_limits) < params.best_count:
            best_limits.append(d_min)
            best_data[tag] = d_min, best_row
          elif (d_min < best_limits).count(True) > 0:
            worst_d_min = flex.max(best_limits)
            for tag, data in best_data.iteritems():
              if worst_d_min == data[0]:
                best_data[tag] = d_min, best_row
                best_limits[flex.first_index(best_limits, worst_d_min)] = d_min
                break
          print tag, "best row:", " ".join(best_row)
        else:
          print "Data didn't pass cutoff"
    if len(best_limits) > 0:
      print "*"*80
      print "Top", len(best_limits)
      for tag, data in best_data.iteritems():
        print tag, " ".join(data[1])
Exemple #10
0
  def __call__(self):
    from iotbx.detectors.cspad_detector_formats import reverse_timestamp
    run_numbers = [r.run for r in self.trial.runs]
    assert self.run.run in run_numbers
    rungroup_ids = [rg.id for rg in self.trial.rungroups]
    assert self.rungroup.id in rungroup_ids
    isoforms = self.trial.isoforms
    assert len(isoforms) > 0
    low_res_bin_ids = []
    high_res_bin_ids = []
    for isoform in isoforms:
      bins = isoform.cell.bins
      d_mins = [float(b.d_min) for b in bins]
      low_res_bin_ids.append(str(bins[d_mins.index(max(d_mins))].id))
      if self.d_min is None:
        min_bin_index = d_mins.index(min(d_mins))
      else:
        d_maxes = [float(b.d_max) for b in bins]
        qualified_bin_indices = [i for i in xrange(len(bins)) if d_maxes[i] >= self.d_min and d_mins[i] <= self.d_min]
        assert len(qualified_bin_indices) == 1
        min_bin_index = qualified_bin_indices[0]
      high_res_bin_ids.append(str(bins[min_bin_index].id))
    assert len(low_res_bin_ids) > 0
    assert len(high_res_bin_ids) > 0
    assert len(low_res_bin_ids) == len(high_res_bin_ids)

    tag = self.app.params.experiment_tag

    # Get the high and low res avg_i_sigi in one query. Means there will be 2x timestamps retrieved, where each is found twice
    query = """SELECT bin.id, event.timestamp, event.n_strong, cb.avg_i_sigi, event.two_theta_low, event.two_theta_high
               FROM `%s_event` event
               JOIN `%s_imageset_event` is_e ON is_e.event_id = event.id
               JOIN `%s_imageset` imgset ON imgset.id = is_e.imageset_id
               JOIN `%s_experiment` exp ON exp.imageset_id = imgset.id
               JOIN `%s_crystal` crystal ON crystal.id = exp.crystal_id
               JOIN `%s_cell` cell ON cell.id = crystal.cell_id
               JOIN `%s_bin` bin ON bin.cell_id = cell.id
               JOIN `%s_cell_bin` cb ON cb.bin_id = bin.id AND cb.crystal_id = crystal.id
               WHERE event.trial_id = %d AND event.run_id = %d AND event.rungroup_id = %d AND
                     cb.bin_id IN (%s)
            """ % (tag, tag, tag, tag, tag, tag, tag, tag, self.trial.id, self.run.id, self.rungroup.id, ", ".join(low_res_bin_ids + high_res_bin_ids))
    cursor = self.app.execute_query(query)
    timestamps = flex.double()
    n_strong = flex.int()
    average_i_sigi_low = flex.double()
    average_i_sigi_high = flex.double()
    two_theta_low = flex.double()
    two_theta_high = flex.double()
    for row in cursor.fetchall():
      b_id, ts, n_s, avg_i_sigi, tt_low, tt_high = row
      rts = reverse_timestamp(ts)
      rts = rts[0] + (rts[1]/1000)
      if rts not in timestamps:
        # First time through, figure out which bin is reported (high or low), add avg_i_sigi to that set of results
        timestamps.append(rts)
        n_strong.append(n_s)
        two_theta_low.append(tt_low or -1)
        two_theta_high.append(tt_high or -1)
        if str(b_id) in low_res_bin_ids:
          average_i_sigi_low.append(avg_i_sigi or 1e-6)
          average_i_sigi_high.append(0)
        elif str(b_id) in high_res_bin_ids:
          average_i_sigi_low.append(0)
          average_i_sigi_high.append(avg_i_sigi or 0)
        else:
          assert False
      else:
        # Second time through, already have added to timestamps and n_strong, so fill in missing avg_i_sigi
        index = flex.first_index(timestamps, rts)
        if str(b_id) in low_res_bin_ids:
          average_i_sigi_low[index] = avg_i_sigi
        elif str(b_id) in high_res_bin_ids:
            average_i_sigi_high[index] = avg_i_sigi or 0
        else:
          assert False

    # This left join query finds the events with no imageset, meaning they failed to index
    query = """SELECT event.timestamp, event.n_strong, event.two_theta_low, event.two_theta_high
               FROM `%s_event` event
               LEFT JOIN `%s_imageset_event` is_e ON is_e.event_id = event.id
               WHERE is_e.event_id IS NULL AND
                     event.trial_id = %d AND event.run_id = %d AND event.rungroup_id = %d
            """ % (tag, tag, self.trial.id, self.run.id, self.rungroup.id)

    cursor = self.app.execute_query(query)
    for row in cursor.fetchall():
      ts, n_s, tt_low, tt_high = row
      rts = reverse_timestamp(ts)
      timestamps.append(rts[0] + (rts[1]/1000))
      n_strong.append(n_s)
      average_i_sigi_low.append(0)
      average_i_sigi_high.append(0)
      two_theta_low.append(tt_low or -1)
      two_theta_high.append(tt_high or -1)

    order = flex.sort_permutation(timestamps)
    timestamps = timestamps.select(order)
    n_strong = n_strong.select(order)
    average_i_sigi_low = average_i_sigi_low.select(order)
    average_i_sigi_high = average_i_sigi_high.select(order)
    two_theta_low = two_theta_low.select(order)
    two_theta_high = two_theta_high.select(order)

    return timestamps, two_theta_low, two_theta_high, n_strong, average_i_sigi_low, average_i_sigi_high
Exemple #11
0
 def get_closest_idx(data, val):
     from scitbx.array_family import flex
     deltas = flex.abs(data - val)
     return flex.first_index(deltas, flex.min(deltas))
Exemple #12
0
    def run(self):
        """Execute the script."""
        import os, math
        from cctbx.crystal import symmetry
        from scitbx.array_family import flex
        from libtbx import table_utils, easy_pickle
        from xfel.command_line.cspad_cbf_metrology import find_files
        from dxtbx.model.experiment_list import ExperimentListFactory

        table_header = ["", "", "", "I", "IsigI", "N >", "RMSD", "Cutoff"]
        table_header2 = [
            "Bin",
            "Resolution Range",
            "Completeness",
            "",
            "",
            "cutoff",
            "(um)",
            "",
        ]

        # Parse the command line
        params, options, all_paths = self.parser.parse_args(
            show_diff_phil=False, return_unhandled=True)
        exp_paths = []
        refl_paths = []
        for path in all_paths:
            exps, refs = find_files(path, "integrated")
            exp_paths.extend(exps)
            refl_paths.extend(refs)
        assert len(exp_paths) == len(refl_paths)

        best_data = {}
        best_limits = flex.double()
        for exp_path, refl_path in zip(exp_paths, refl_paths):
            experiments = ExperimentListFactory.from_json_file(
                exp_path, check_format=False)
            reflections = easy_pickle.load(refl_path)
            exp_name = os.path.basename(exp_path)
            if exp_name.startswith("idx-") and exp_name.endswith(
                    "_refined_experiments.json"):
                tag = exp_name.lstrip("idx-").rstrip(
                    "_refined_experiments.json")
            else:
                tag = "%s, %s" % (exp_path, refl_path)

            for exp_id, experiment in enumerate(experiments):
                print("*" * 80)
                print("Data table for", tag)
                table_data = []
                table_data.append(table_header)
                table_data.append(table_header2)

                crystal = experiment.crystal
                refls = reflections.select(reflections["id"] == exp_id)
                sym = symmetry(
                    unit_cell=crystal.get_unit_cell(),
                    space_group=crystal.get_space_group(),
                )
                d = crystal.get_unit_cell().d(refls["miller_index"])
                mset = sym.miller_set(
                    indices=refls["miller_index"].select(d >= params.d_min),
                    anomalous_flag=False,
                )
                binner = mset.setup_binner(n_bins=params.n_bins)
                acceptable_resolution_bins = []
                for i in binner.range_used():
                    d_max, d_min = binner.bin_d_range(i)
                    sel = (d <= d_max) & (d > d_min)
                    sel &= refls["intensity.sum.value"] > 0
                    bin_refls = refls.select(sel)
                    n_refls = len(bin_refls)
                    avg_i = (flex.mean(bin_refls["intensity.sum.value"])
                             if n_refls > 0 else 0)
                    avg_i_sigi = (flex.mean(
                        bin_refls["intensity.sum.value"] /
                        flex.sqrt(bin_refls["intensity.sum.variance"]))
                                  if n_refls > 0 else 0)
                    acceptable_resolution_bins.append(
                        avg_i_sigi >= params.sig_filter_sigma)

                    bright_refls = bin_refls.select(
                        (bin_refls["intensity.sum.value"] /
                         flex.sqrt(bin_refls["intensity.sum.variance"])
                         ) >= params.sig_filter_sigma)
                    n_bright = len(bright_refls)

                    rmsd_obs = (1000 * math.sqrt(
                        (bright_refls["xyzcal.mm"] -
                         bright_refls["xyzobs.mm.value"]).sum_sq() / n_bright)
                                if n_bright > 0 else 0)

                    table_row = []
                    table_row.append("%3d" % i)
                    table_row.append("%-13s" % binner.bin_legend(
                        i_bin=i,
                        show_bin_number=False,
                        show_bin_range=False,
                        show_d_range=True,
                        show_counts=False,
                    ))
                    table_row.append("%13s" % binner.bin_legend(
                        i_bin=i,
                        show_bin_number=False,
                        show_bin_range=False,
                        show_d_range=False,
                        show_counts=True,
                    ))

                    table_row.append("%.1f" % (avg_i))
                    table_row.append("%.1f" % (avg_i_sigi))
                    table_row.append("%3d" % n_bright)
                    table_row.append("%.1f" % (rmsd_obs))
                    table_data.append(table_row)

                acceptable_resolution_bins = [
                    acceptable_resolution_bins[i]
                    for i in xrange(len(acceptable_resolution_bins))
                    if False not in acceptable_resolution_bins[:i + 1]
                ]

                for b, row in zip(acceptable_resolution_bins, table_data[2:]):
                    if b:
                        row.append("X")
                print(
                    table_utils.format(table_data,
                                       has_header=2,
                                       justify="center",
                                       delim=" "))
                print(
                    tag,
                    "unit cell:",
                    ", ".join([
                        "%.2f" % p
                        for p in crystal.get_unit_cell().parameters()
                    ]),
                    crystal.get_space_group().info(),
                )

                if any(acceptable_resolution_bins):
                    best_index = acceptable_resolution_bins.count(True) - 1
                    best_row = table_data[best_index + 2]
                    d_min = binner.bin_d_range(
                        binner.range_used()[best_index])[1]
                    if len(best_limits) < params.best_count:
                        best_limits.append(d_min)
                        best_data[tag] = d_min, best_row
                    elif (d_min < best_limits).count(True) > 0:
                        worst_d_min = flex.max(best_limits)
                        for t, data in best_data.iteritems():
                            if worst_d_min == data[0]:
                                best_data[t] = d_min, best_row
                                best_limits[flex.first_index(
                                    best_limits, worst_d_min)] = d_min
                                break
                    print(tag, "best row:", " ".join(best_row))
                else:
                    print("Data didn't pass cutoff")
        if len(best_limits) > 0:
            print("*" * 80)
            print("Top", len(best_limits))
            for tag, data in best_data.iteritems():
                print(tag, " ".join(data[1]))
Exemple #13
0
    def __call__(self):
        from iotbx.detectors.cspad_detector_formats import reverse_timestamp
        run_numbers = [r.run for r in self.trial.runs]
        assert self.run.run in run_numbers
        rungroup_ids = [rg.id for rg in self.trial.rungroups]
        assert self.rungroup.id in rungroup_ids
        isoforms = self.trial.isoforms
        assert len(isoforms) > 0
        low_res_bin_ids = []
        high_res_bin_ids = []
        for isoform in isoforms:
            bins = isoform.cell.bins
            d_mins = [float(b.d_min) for b in bins]
            low_res_bin_ids.append(str(bins[d_mins.index(max(d_mins))].id))
            if self.d_min is None:
                min_bin_index = d_mins.index(min(d_mins))
            else:
                d_maxes = [float(b.d_max) for b in bins]
                qualified_bin_indices = [
                    i for i in xrange(len(bins))
                    if d_maxes[i] >= self.d_min and d_mins[i] <= self.d_min
                ]
                assert len(qualified_bin_indices) == 1
                min_bin_index = qualified_bin_indices[0]
            high_res_bin_ids.append(str(bins[min_bin_index].id))
        assert len(low_res_bin_ids) > 0
        assert len(high_res_bin_ids) > 0
        assert len(low_res_bin_ids) == len(high_res_bin_ids)

        tag = self.app.params.experiment_tag

        # Get the high and low res avg_i_sigi in one query. Means there will be 2x timestamps retrieved, where each is found twice
        query = """SELECT bin.id, event.timestamp, event.n_strong, cb.avg_i_sigi, event.two_theta_low, event.two_theta_high
               FROM `%s_event` event
               JOIN `%s_imageset_event` is_e ON is_e.event_id = event.id
               JOIN `%s_imageset` imgset ON imgset.id = is_e.imageset_id
               JOIN `%s_experiment` exp ON exp.imageset_id = imgset.id
               JOIN `%s_crystal` crystal ON crystal.id = exp.crystal_id
               JOIN `%s_cell` cell ON cell.id = crystal.cell_id
               JOIN `%s_bin` bin ON bin.cell_id = cell.id
               JOIN `%s_cell_bin` cb ON cb.bin_id = bin.id AND cb.crystal_id = crystal.id
               WHERE event.trial_id = %d AND event.run_id = %d AND event.rungroup_id = %d AND
                     cb.bin_id IN (%s)
            """ % (tag, tag, tag, tag, tag, tag, tag, tag, self.trial.id,
                   self.run.id, self.rungroup.id,
                   ", ".join(low_res_bin_ids + high_res_bin_ids))
        cursor = self.app.execute_query(query)
        timestamps = flex.double()
        n_strong = flex.int()
        average_i_sigi_low = flex.double()
        average_i_sigi_high = flex.double()
        two_theta_low = flex.double()
        two_theta_high = flex.double()
        for row in cursor.fetchall():
            b_id, ts, n_s, avg_i_sigi, tt_low, tt_high = row
            rts = reverse_timestamp(ts)
            rts = rts[0] + (rts[1] / 1000)
            if rts not in timestamps:
                # First time through, figure out which bin is reported (high or low), add avg_i_sigi to that set of results
                timestamps.append(rts)
                n_strong.append(n_s)
                two_theta_low.append(tt_low or -1)
                two_theta_high.append(tt_high or -1)
                if str(b_id) in low_res_bin_ids:
                    average_i_sigi_low.append(avg_i_sigi or 1e-6)
                    average_i_sigi_high.append(0)
                elif str(b_id) in high_res_bin_ids:
                    average_i_sigi_low.append(0)
                    average_i_sigi_high.append(avg_i_sigi or 0)
                else:
                    assert False
            else:
                # Second time through, already have added to timestamps and n_strong, so fill in missing avg_i_sigi
                index = flex.first_index(timestamps, rts)
                if str(b_id) in low_res_bin_ids:
                    average_i_sigi_low[index] = avg_i_sigi
                elif str(b_id) in high_res_bin_ids:
                    average_i_sigi_high[index] = avg_i_sigi or 0
                else:
                    assert False

        # This left join query finds the events with no imageset, meaning they failed to index
        query = """SELECT event.timestamp, event.n_strong, event.two_theta_low, event.two_theta_high
               FROM `%s_event` event
               LEFT JOIN `%s_imageset_event` is_e ON is_e.event_id = event.id
               WHERE is_e.event_id IS NULL AND
                     event.trial_id = %d AND event.run_id = %d AND event.rungroup_id = %d
            """ % (tag, tag, self.trial.id, self.run.id, self.rungroup.id)

        cursor = self.app.execute_query(query)
        for row in cursor.fetchall():
            ts, n_s, tt_low, tt_high = row
            rts = reverse_timestamp(ts)
            timestamps.append(rts[0] + (rts[1] / 1000))
            n_strong.append(n_s)
            average_i_sigi_low.append(0)
            average_i_sigi_high.append(0)
            two_theta_low.append(tt_low or -1)
            two_theta_high.append(tt_high or -1)

        order = flex.sort_permutation(timestamps)
        timestamps = timestamps.select(order)
        n_strong = n_strong.select(order)
        average_i_sigi_low = average_i_sigi_low.select(order)
        average_i_sigi_high = average_i_sigi_high.select(order)
        two_theta_low = two_theta_low.select(order)
        two_theta_high = two_theta_high.select(order)

        return timestamps, two_theta_low, two_theta_high, n_strong, average_i_sigi_low, average_i_sigi_high
Exemple #14
0
  def imgCIF_H(cbf_handle):
    '''Initialize a goniometer model from an imgCIF file handle, where
    it is assumed that the file has already been read.'''

    # find the goniometer axes and dependencies
    from scitbx.array_family import flex
    axis_names = flex.std_string()
    depends_on = flex.std_string()
    axes = flex.vec3_double()
    angles = flex.double()
    scan_axis = None
    cbf_handle.find_category("axis")
    for i in range(cbf_handle.count_rows()):
      cbf_handle.find_column("equipment")
      if cbf_handle.get_value() == "goniometer":
        cbf_handle.find_column("id")
        axis_names.append(cbf_handle.get_value())
        axis = []
        for i in range(3):
          cbf_handle.find_column("vector[%i]" %(i+1))
          axis.append(float(cbf_handle.get_value()))
        axes.append(axis)
        cbf_handle.find_column("depends_on")
        depends_on.append(cbf_handle.get_value())
        cbf_handle.next_row()

    # find the starting angles of each goniometer axis and figure out which one
    # is the scan axis (i.e. non-zero angle_increment)
    cbf_handle.find_category("diffrn_scan_axis")
    for i in range(cbf_handle.count_rows()):
      cbf_handle.find_column("axis_id")
      axis_name = cbf_handle.get_value()
      if axis_name not in axis_names: continue
      cbf_handle.find_column("angle_start")
      axis_angle = float(cbf_handle.get_value())
      cbf_handle.find_column("angle_increment")
      increment = float(cbf_handle.get_value())
      angles.append(axis_angle)
      if abs(increment) > 0:
        assert scan_axis is None, "More than one scan axis is defined: not currently supported"
        scan_axis = flex.first_index(axis_names, axis_name)
      cbf_handle.next_row()
    assert axes.size() == angles.size()
    assert scan_axis is not None

    # figure out the order of the axes from the depends_on values
    order = flex.size_t()
    for i in range(axes.size()):
      if depends_on[i] == '.':
        o = 0
      else:
        o = flex.first_index(axis_names, depends_on[i])+1
      assert o not in order
      order.append(o)

    # multi-axis gonio requires axes in order as viewed from crystal to gonio base
    # i.e. the reverse of the order we have from cbf header
    order = order.reversed()
    axes = axes.select(order)
    angles = angles.select(order)
    axis_names = axis_names.select(order)
    scan_axis = axes.size() - scan_axis - 1

    # construct a multi-axis goniometer
    gonio = goniometer_factory.multi_axis(axes, angles, axis_names, scan_axis)
    return gonio
def find_relatives(ids, cc_min, cc_max, rmax, codes, moments, nmax=10):
    indices = flex.int()
    idlist = open('id_list.txt', 'r')
    for id in idlist:
        id = id[0:4]
        indices.append(flex.first_index(codes, id))
    r_max = easy_pickle.load(prefix + 'pisa.rmax')
    nns = easy_pickle.load(prefix + 'pisa.nn')
    nn_array = math.nl_array(nmax)
    nn_indx = nn_array.nl()
    nn_total = nn_indx.size()
    q_array = flex.double(range(501)) / 2000.0

    ref_nlm_array = math.nlm_array(nmax)
    target_nlm_array = math.nlm_array(nmax)
    nlm = ref_nlm_array.nlm()
    coef_size = nlm.size()
    all_indices = range(codes.size())

    small_q_array = flex.double(range(51)) / 300.0
    mean = []
    sig = []
    for indx in indices:
        print indx
        #rmax = 50.0 #r_max[indx]
        ref_coef = moments[indx]
        ref_nlm_array.load_coefs(nlm, ref_coef[0:coef_size])
        z_model = zernike_model(ref_nlm_array, q_array, rmax, nmax)
        out_name = codes[indx] + "_.qi"
        nn_array.load_coefs(nn_indx, nns[indx][0:nn_total])
        ref_int = put_intensity(z_model, q_array, nn_array, out_name)
        mean_r = ref_int * 0.0
        sig_r = ref_int * 0.0
        small_z_model = zernike_model(ref_nlm_array, small_q_array, rmax, nmax)
        small_ref_int = small_z_model.calc_intensity(nn_array)
        small_ref_int = small_ref_int / small_ref_int[0]
        N = 0.0
        for coef, ii in zip(moments, all_indices):
            if N > 25: break
            target_nlm_array.load_coefs(nlm, coef[0:coef_size])
            align_obj = fft_align.align(ref_nlm_array,
                                        target_nlm_array,
                                        nmax=nmax,
                                        topn=10,
                                        refine=False)
            cc = align_obj.get_cc()
            if (cc >= cc_min and cc <= cc_max):
                N += 1
                nn_array.load_coefs(nn_indx, nns[ii][0:nn_total])
                opt_r_obj = optimize_r(nn_array, small_ref_int, small_q_array,
                                       nmax)
                opt_r = gss(opt_r_obj.target, rmax * 0.8, rmax * 1.2)
                z_model = zernike_model(ref_nlm_array, q_array, opt_r, nmax)
                out_name = codes[indx] + "_" + codes[ii] + ".qi.rel"
                mod_int = put_intensity(z_model, q_array, nn_array, out_name,
                                        ref_int)
                out_name = codes[indx] + "_" + codes[ii] + ".qi"
                put_intensity(z_model, q_array, nn_array, out_name)
                mod_int = mod_int - 1.0
                mean_r += mod_int
                sig_r += mod_int * mod_int
                print ii, cc, codes[ii], opt_r
        if N > 3:
            mean_r /= N
            sig_r = sig_r / N - mean_r * mean_r
            mean.append(mean_r)
            sig.append(sig_r)

    N = len(mean)
    if N > 0:
        mean_r = mean[0] * 0.0
        s_r = mean[0] * 0.0
        for uu in range(N):
            mean_r += mean[uu]
            s_r += sig[uu]
        mean_r /= N
        s_r /= N
        s_r = flex.sqrt(s_r)
        f = open('q_m_s_%s.dat' % rmax, 'w')
        for q, m, s in zip(q_array, mean_r, s_r):
            print >> f, q, m, s
Exemple #16
0
    def cluster_analysis(self):
        from cctbx.sgtbx import cosets

        if self.params.cluster.method == 'dbscan':
            self.dbscan_clustering()
        elif self.params.cluster.method == 'bisect':
            self.bisect_clustering()
        elif self.params.cluster.method == 'minimize_divide':
            self.minimize_divide_clustering()
        elif self.params.cluster.method == 'agglomerative':
            self.agglomerative_clustering()
        elif self.params.cluster.method == 'seed':
            self.seed_clustering()

        # Number of clusters in labels, ignoring noise if present.
        n_clusters = len(set(
            self.cluster_labels)) - (1 if -1 in self.cluster_labels else 0)

        cluster_miller_arrays = []

        space_groups = []

        sym_ops = [
            sgtbx.rt_mx(s).new_denominators(1, 12)
            for s in self.target.get_sym_ops()
        ]
        self.space_groups = space_groups

        reindexing_ops = {}
        space_groups = {}

        for dataset_id in range(len(self.datasets)):
            sg = copy.deepcopy(self.input_space_group)
            ref_sym_op_id = None
            ref_cluster_id = None
            for sym_op_id in range(len(sym_ops)):
                i_cluster = self.cluster_labels[len(self.datasets) * sym_op_id
                                                + dataset_id]
                if i_cluster < 0:
                    continue
                if ref_sym_op_id is None:
                    ref_sym_op_id = sym_op_id
                    ref_cluster_id = i_cluster
                    continue
                op = sym_ops[ref_sym_op_id].inverse().multiply(
                    sym_ops[sym_op_id])
                if i_cluster == ref_cluster_id:
                    sg.expand_smx(op.new_denominators(1, 12))

            sg.make_tidy()
            space_groups[dataset_id] = sg

            coset = cosets.left_decomposition(
                self.target._lattice_group,
                sg.info().primitive_setting().group())

            reindexing_ops[dataset_id] = {}

            for i_cluster in range(n_clusters):
                isel = (self.cluster_labels == i_cluster).iselection()
                dataset_ids = isel % len(self.datasets)
                idx = flex.first_index(dataset_ids, dataset_id)
                sel = (dataset_ids == dataset_id).iselection()
                if idx >= 0:
                    sym_op_id = isel[idx] // len(self.datasets)
                for s in sel:
                    sym_op_id = isel[s] // len(self.datasets)

                    for partition in coset.partitions:
                        if sym_ops[sym_op_id] in partition:
                            if i_cluster not in reindexing_ops[dataset_id]:
                                reindexing_ops[dataset_id][
                                    i_cluster] = partition[0].as_xyz()
                            #else:
                            #assert reindexing_ops[dataset_id][i_cluster] == partition[0].as_xyz()

        self.space_groups = space_groups
        self.reindexing_ops = reindexing_ops
Exemple #17
0
def run(args, imageset=None):
    # Parse input
    try:
        len(args)
    except Exception:
        params = args
    else:
        user_phil = []
        for arg in args:
            if "=" in arg:
                try:
                    user_phil.append(libtbx.phil.parse(arg))
                except RuntimeError as e:
                    raise Sorry("Unrecognized argument '%s' (error: %s)" %
                                (arg, str(e)))
            else:
                try:
                    user_phil.append(
                        libtbx.phil.parse("""file_path=%s""" % arg))
                except ValueError:
                    raise Sorry("Unrecognized argument '%s'" % arg)
        params = master_phil.fetch(sources=user_phil).extract()
    if imageset is None:
        if (params.file_path is None or len(params.file_path) == 0
                or not all(os.path.isfile(f) for f in params.file_path)):
            master_phil.show()
            raise Usage(
                "file_path must be defined (either file_path=XXX, or the path alone)."
            )
    assert params.n_bins is not None
    assert params.verbose is not None
    assert params.output_bins is not None

    # Allow writing to a file instead of stdout
    if params.output_file is None:
        logger = sys.stdout
    else:
        logger = open(params.output_file, "w")
        logger.write("%s " % params.output_file)

    if params.show_plots:
        from matplotlib import pyplot as plt

        colormap = plt.cm.gist_ncar
        plt.gca().set_color_cycle(
            [colormap(i) for i in np.linspace(0, 0.9, len(params.file_path))])

    if params.mask is not None:
        params.mask = easy_pickle.load(params.mask)

    if imageset is None:
        iterable = params.file_path

        def load_func(x):
            try:
                obj = dxtbx.datablock.DataBlockFactory.from_filenames(
                    [x])[0].extract_imagesets()[0]
            except IndexError:
                try:
                    obj = dxtbx.datablock.DataBlockFactory.from_json_file(
                        x)[0].extract_imagesets()[0]
                except dxtbx.datablock.InvalidDataBlockError:
                    obj = ExperimentListFactory.from_json_file(x)[0].imageset
            return obj

    else:
        iterable = [imageset]

        def load_func(x):
            return x

    # Iterate over each file provided
    for item in iterable:
        iset = load_func(item)
        n_images = len(iset)
        if params.image_number is None:
            if params.max_images is None:
                subiterable = range(n_images)
            else:
                subiterable = range(0, min(params.max_images, n_images))
        else:
            subiterable = [params.image_number]
        for image_number in subiterable:
            beam = iset.get_beam(image_number)
            detector = iset.get_detector(image_number)
            s0 = col(beam.get_s0())

            # Search the detector for the panel farthest from the beam. The
            # number of bins in the radial average will be equal to the
            # farthest point from the beam on the detector, in pixels, unless
            # overridden at the command line
            panel_res = [p.get_max_resolution_at_corners(s0) for p in detector]
            farthest_panel = detector[panel_res.index(min(panel_res))]
            size2, size1 = farthest_panel.get_image_size()
            corners = [(0, 0), (size1 - 1, 0), (0, size2 - 1),
                       (size1 - 1, size2 - 1)]
            corners_lab = [
                col(farthest_panel.get_pixel_lab_coord(c)) for c in corners
            ]
            corner_two_thetas = [
                farthest_panel.get_two_theta_at_pixel(s0, c) for c in corners
            ]
            extent_two_theta = max(corner_two_thetas)
            max_corner = corners_lab[corner_two_thetas.index(extent_two_theta)]
            extent = int(
                math.ceil(max_corner.length() * math.sin(extent_two_theta) /
                          max(farthest_panel.get_pixel_size())))
            extent_two_theta *= 180 / math.pi

            if params.n_bins < extent:
                params.n_bins = extent

            # These arrays will store the radial average info
            sums = flex.double(params.n_bins) * 0
            sums_sq = flex.double(params.n_bins) * 0
            counts = flex.int(params.n_bins) * 0

            all_data = iset[image_number]

            if not isinstance(all_data, tuple):
                all_data = (all_data, )

            for tile, (panel, data) in enumerate(zip(detector, all_data)):
                if params.panel is not None and tile != params.panel:
                    continue

                if params.mask is None:
                    mask = flex.bool(flex.grid(data.focus()), True)
                else:
                    mask = params.mask[tile]

                if hasattr(data, "as_double"):
                    data = data.as_double()

                logger.flush()
                if params.verbose:
                    logger.write("Average intensity tile %d: %9.3f\n" %
                                 (tile, flex.mean(data)))
                    logger.write("N bins: %d\n" % params.n_bins)
                    logger.flush()

                x1, y1, x2, y2 = (
                    0,
                    0,
                    panel.get_image_size()[1],
                    panel.get_image_size()[0],
                )
                bc = panel.get_beam_centre_px(beam.get_s0())
                bc = int(round(bc[1])), int(round(bc[0]))

                # compute the average
                radial_average(
                    data,
                    mask,
                    bc,
                    sums,
                    sums_sq,
                    counts,
                    panel.get_pixel_size()[0],
                    panel.get_distance(),
                    (x1, y1),
                    (x2, y2),
                )

            # average the results, avoiding division by zero
            results = sums.set_selected(counts <= 0, 0)
            results /= counts.set_selected(counts <= 0, 1).as_double()

            if params.median_filter_size is not None:
                logger.write(
                    "WARNING, the median filter is not fully propagated to the variances\n"
                )
                from scipy.ndimage.filters import median_filter

                results = flex.double(
                    median_filter(results.as_numpy_array(),
                                  size=params.median_filter_size))

            # calculate standard devations
            stddev_sel = ((sums_sq - sums * results) >= 0) & (counts > 0)
            std_devs = flex.double(len(sums), 0)
            std_devs.set_selected(
                stddev_sel,
                (sums_sq.select(stddev_sel) -
                 sums.select(stddev_sel) * results.select(stddev_sel)) /
                counts.select(stddev_sel).as_double(),
            )
            std_devs = flex.sqrt(std_devs)

            twotheta = (flex.double(range(len(results))) * extent_two_theta /
                        params.n_bins)
            q_vals = (4 * math.pi * flex.sin(math.pi * twotheta / 360) /
                      beam.get_wavelength())
            # nlmbda = 2dsin(theta)
            resolution = flex.double(len(twotheta), 0)
            nonzero = twotheta > 0
            resolution.set_selected(
                nonzero,
                beam.get_wavelength() / (2 * flex.asin(
                    (math.pi / 180) * twotheta.select(nonzero) / 2)),
            )

            if params.low_max_two_theta_limit is None:
                subset = results
            else:
                subset = results.select(
                    twotheta >= params.low_max_two_theta_limit)

            max_result = flex.max(subset)

            if params.x_axis == "two_theta":
                xvals = twotheta
                max_x = twotheta[flex.first_index(results, max_result)]
            elif params.x_axis == "q":
                xvals = q_vals
                max_x = q_vals[flex.first_index(results, max_result)]
            elif params.x_axis == "resolution":
                xvals = resolution
                max_x = resolution[flex.first_index(results, max_result)]

            for i, r in enumerate(results):
                val = xvals[i]
                if params.output_bins and "%.3f" % r != "nan":
                    # logger.write("%9.3f %9.3f\n"%     (val,r))        #.xy  format for Rex.cell.
                    logger.write(
                        "%9.3f %9.3f %9.3f\n" %
                        (val, r, std_devs[i]))  # .xye format for GSASII
                # logger.write("%.3f %.3f %.3f\n"%(val,r,ds[i]))  # include calculated d spacings
            logger.write("Maximum %s: %f, value: %f\n" %
                         (params.x_axis, max_x, max_result))

            if params.show_plots:
                if params.plot_x_max is not None:
                    results = results.select(xvals <= params.plot_x_max)
                    xvals = xvals.select(xvals <= params.plot_x_max)
                if params.normalize:
                    plt.plot(
                        xvals.as_numpy_array(),
                        (results / flex.max(results)).as_numpy_array(),
                        "-",
                    )
                else:
                    plt.plot(xvals.as_numpy_array(), results.as_numpy_array(),
                             "-")
                if params.x_axis == "two_theta":
                    plt.xlabel("2 theta")
                elif params.x_axis == "q":
                    plt.xlabel("q")
                elif params.x_axis == "resolution":
                    plt.xlabel("Resolution ($\\AA$)")
                    plt.gca().set_xscale("log")
                    plt.gca().invert_xaxis()
                    plt.xlim(0, 50)
                plt.ylabel("Avg ADUs")
                if params.plot_y_max is not None:
                    plt.ylim(0, params.plot_y_max)

    if params.show_plots:
        # plt.legend([os.path.basename(os.path.splitext(f)[0]) for f in params.file_path], ncol=2)
        plt.show()

    return xvals, results
Exemple #18
0
    def seed_clustering(self):
        eps = 1e-6
        X_orig = self.coords.as_numpy_array()

        import numpy as np
        from scipy.cluster import hierarchy
        import scipy.spatial.distance as ssd
        from sklearn.neighbors import NearestNeighbors
        from sklearn import metrics

        # initialise cluster labels: -1 signifies doesn't belong to a cluster
        self.cluster_labels = flex.int(self.coords.all()[0], -1)

        cluster_id = 0
        while self.cluster_labels.count(-1) > 0:
            dataset_ids = (flex.int_range(
                len(self.datasets) * len(self.target.get_sym_ops())) %
                           len(self.datasets)).as_numpy_array()
            coord_ids = flex.int_range(dataset_ids.size).as_numpy_array()

            # select only those points that don't already belong to a cluster
            sel = np.where(self.cluster_labels == -1)
            X = X_orig[sel]
            dataset_ids = dataset_ids[sel]
            coord_ids = coord_ids[sel]

            # choose a high density point as seed for cluster
            nbrs = NearestNeighbors(n_neighbors=min(11, len(X)),
                                    algorithm='brute',
                                    metric='cosine').fit(X)
            distances, indices = nbrs.kneighbors(X)
            average_distance = flex.double(
                [dist[1:].mean() for dist in distances])
            i = flex.min_index(average_distance)

            d_id = dataset_ids[i]
            cluster = np.array([coord_ids[i]])
            cluster_dataset_ids = np.array([d_id])
            xis = np.array([X[i]])

            for j in range(len(self.datasets) - 1):
                # select only those rows that don't correspond to a dataset already
                # present in current cluster
                sel = np.where(dataset_ids != d_id)
                X = X[sel]
                dataset_ids = dataset_ids[sel]
                coord_ids = coord_ids[sel]

                assert len(X) > 0

                # Find nearest neighbour in cosine-space to the current cluster centroid
                nbrs = NearestNeighbors(n_neighbors=min(1, len(X)),
                                        algorithm='brute',
                                        metric='cosine').fit(X)
                distances, indices = nbrs.kneighbors([xis.mean(axis=0)])
                k = indices[0][0]
                d_id = dataset_ids[k]
                cluster = np.append(cluster, coord_ids[k])
                cluster_dataset_ids = np.append(cluster_dataset_ids, d_id)
                xis = np.append(xis, [X[k]], axis=0)

            # label this cluster
            self.cluster_labels.set_selected(flex.size_t(cluster.tolist()),
                                             cluster_id)
            cluster_id += 1

        if flex.max(self.cluster_labels) == 0:
            # assume single cluster
            return self.cluster_labels

        cluster_centroids = []
        X = self.coords.as_numpy_array()
        for i in set(self.cluster_labels):
            sel = self.cluster_labels == i
            cluster_centroids.append(X[(
                self.cluster_labels == i).iselection().as_numpy_array()].mean(
                    axis=0))

        # hierarchical clustering of cluster centroids, using cosine metric
        dist_mat = ssd.pdist(cluster_centroids, metric='cosine')
        linkage_matrix = hierarchy.linkage(dist_mat, method='average')

        # compare valid equal-sized clustering using silhouette scores
        # https://en.wikipedia.org/wiki/Silhouette_(clustering)
        # http://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_silhouette_analysis.html
        distances = linkage_matrix[::, 2]
        distances = np.insert(distances, 0, 0)
        silhouette_scores = flex.double()
        thresholds = flex.double()
        n_clusters = flex.size_t()
        for threshold in distances[1:]:
            cluster_labels = self.cluster_labels.deep_copy()
            labels = hierarchy.fcluster(linkage_matrix,
                                        threshold - eps,
                                        criterion='distance').tolist()
            counts = [labels.count(l) for l in set(labels)]
            if len(set(counts)) > 1:
                # only equal-sized clusters are valid
                continue

            n = len(set(labels))
            if n == 1: continue
            for i in range(len(labels)):
                cluster_labels.set_selected(self.cluster_labels == i,
                                            int(labels[i] - 1))
            silhouette_avg = metrics.silhouette_score(
                X, cluster_labels.as_numpy_array(), metric='cosine')
            # Compute the silhouette scores for each sample
            sample_silhouette_values = metrics.silhouette_samples(
                X, cluster_labels.as_numpy_array(), metric='cosine')
            silhouette_avg = sample_silhouette_values.mean()
            silhouette_scores.append(silhouette_avg)
            thresholds.append(threshold)
            n_clusters.append(n)

            count_negative = (sample_silhouette_values < 0).sum()
            logger.info('Clustering:')
            logger.info('  Number of clusters: %i' % n)
            logger.info('  Threshold score: %.3f (%.1f deg)' %
                        (threshold, math.degrees(math.acos(1 - threshold))))
            logger.info('  Silhouette score: %.3f' % silhouette_avg)
            logger.info('  -ve silhouette scores: %.1f%%' %
                        (100 * count_negative / sample_silhouette_values.size))

            if self.params.save_plot:
                plot_silhouette(sample_silhouette_values,
                                cluster_labels.as_numpy_array(),
                                file_name='%ssilhouette_%i.png' %
                                (self.params.plot_prefix, n))

        if self.params.cluster.seed.n_clusters is Auto:
            idx = flex.max_index(silhouette_scores)
        else:
            idx = flex.first_index(n_clusters,
                                   self.params.cluster.seed.n_clusters)
            if idx is None:
                raise Sorry('No valid clustering with %i clusters' %
                            self.params.cluster.seed.n_clusters)

        if (self.params.cluster.seed.n_clusters is Auto
                and silhouette_scores[idx] <
                self.params.cluster.seed.min_silhouette_score):
            # assume single cluster
            self.cluster_labels = flex.int(self.cluster_labels.size(), 0)
        else:
            threshold = thresholds[idx] - eps
            labels = hierarchy.fcluster(linkage_matrix,
                                        threshold,
                                        criterion='distance')
            cluster_labels = flex.double(self.cluster_labels.size(), -1)
            for i in range(len(labels)):
                cluster_labels.set_selected(self.cluster_labels == i,
                                            labels[i] - 1)
            self.cluster_labels = cluster_labels

        if self.params.save_plot:
            plot_matrix(1 - ssd.squareform(dist_mat),
                        linkage_matrix,
                        '%sseed_clustering_cos_angle_matrix.png' %
                        self.params.plot_prefix,
                        color_threshold=threshold)
            plot_dendrogram(linkage_matrix,
                            '%sseed_clustering_cos_angle_dendrogram.png' %
                            self.params.plot_prefix,
                            color_threshold=threshold)

        return self.cluster_labels
Exemple #19
0
def run (args, image = None):
  from xfel import radial_average
  from scitbx.array_family import flex
  import os, sys
  import dxtbx

  # Parse input
  try:
    n = len(args)
  except Exception:
    params = args
  else:
    user_phil = []
    for arg in args:
      if (not "=" in arg):
        try :
          user_phil.append(libtbx.phil.parse("""file_path=%s""" % arg))
        except ValueError:
          raise Sorry("Unrecognized argument '%s'" % arg)
      else:
        try:
          user_phil.append(libtbx.phil.parse(arg))
        except RuntimeError as e:
          raise Sorry("Unrecognized argument '%s' (error: %s)" % (arg, str(e)))
    params = master_phil.fetch(sources=user_phil).extract()
  if image is None:
    if params.file_path is None or len(params.file_path) == 0 or not all([os.path.isfile(f) for f in params.file_path]):
      master_phil.show()
      raise Usage("file_path must be defined (either file_path=XXX, or the path alone).")
  assert params.n_bins is not None
  assert params.verbose is not None
  assert params.output_bins is not None

  # Allow writing to a file instead of stdout
  if params.output_file is None:
    logger = sys.stdout
  else:
    logger = open(params.output_file, 'w')
    logger.write("%s "%params.output_file)

  if params.show_plots:
    from matplotlib import pyplot as plt
    import numpy as np
    colormap = plt.cm.gist_ncar
    plt.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, len(params.file_path))])

  if params.mask is not None:
    params.mask = easy_pickle.load(params.mask)

  if image is None:
    iterable = params.file_path
    load_func = lambda x: dxtbx.load(x)
  else:
    iterable = [image]
    load_func = lambda x: x

  # Iterate over each file provided
  for item in iterable:
    img = load_func(item)
    try:
      n_images = img.get_num_images()
      subiterable = xrange(n_images)
    except AttributeError:
      n_images = None
      subiterable = [0]
    for image_number in subiterable:
      if n_images is None:
        beam = img.get_beam()
        detector = img.get_detector()
      else:
        beam = img.get_beam(image_number)
        detector = img.get_detector(image_number)
      s0 = col(beam.get_s0())

      # Search the detector for the panel farthest from the beam. The number of bins in the radial average will be
      # equal to the farthest point from the beam on the detector, in pixels, unless overridden at the command line
      panel_res = [p.get_max_resolution_at_corners(s0) for p in detector]
      farthest_panel = detector[panel_res.index(min(panel_res))]
      size2, size1 = farthest_panel.get_image_size()
      corners = [(0,0), (size1-1,0), (0,size2-1), (size1-1,size2-1)]
      corners_lab = [col(farthest_panel.get_pixel_lab_coord(c)) for c in corners]
      corner_two_thetas = [farthest_panel.get_two_theta_at_pixel(s0, c) for c in corners]
      extent_two_theta = max(corner_two_thetas)
      max_corner = corners_lab[corner_two_thetas.index(extent_two_theta)]
      extent = int(math.ceil(max_corner.length()*math.sin(extent_two_theta)/max(farthest_panel.get_pixel_size())))
      extent_two_theta *= 180/math.pi

      if params.n_bins < extent:
        params.n_bins = extent

      # These arrays will store the radial average info
      sums    = flex.double(params.n_bins) * 0
      sums_sq = flex.double(params.n_bins) * 0
      counts  = flex.int(params.n_bins) * 0

      if n_images is None:
        all_data = img.get_raw_data()
      else:
        all_data = img.get_raw_data(image_number)

      if not isinstance(all_data, tuple):
        all_data = (all_data,)

      for tile, (panel, data) in enumerate(zip(detector, all_data)):
        if params.mask is None:
          mask = flex.bool(flex.grid(data.focus()), True)
        else:
          mask = params.mask[tile]

        if hasattr(data,"as_double"):
          data = data.as_double()

        logger.flush()
        if params.verbose:
          logger.write("Average intensity tile %d: %9.3f\n"%(tile, flex.mean(data)))
          logger.write("N bins: %d\n"%params.n_bins)
          logger.flush()

        x1,y1,x2,y2 = 0,0,panel.get_image_size()[1],panel.get_image_size()[0]
        bc = panel.get_beam_centre_px(beam.get_s0())
        bc = int(round(bc[1])), int(round(bc[0]))

        # compute the average
        radial_average(data,mask,bc,sums,sums_sq,counts,panel.get_pixel_size()[0],panel.get_distance(),
                       (x1,y1),(x2,y2))

      # average the results, avoiding division by zero
      results = sums.set_selected(counts <= 0, 0)
      results /= counts.set_selected(counts <= 0, 1).as_double()

      if params.median_filter_size is not None:
        logger.write("WARNING, the median filter is not fully propogated to the variances\n")
        from scipy.ndimage.filters import median_filter
        results = flex.double(median_filter(results.as_numpy_array(), size = params.median_filter_size))

      # calculate standard devations
      stddev_sel = ((sums_sq-sums*results) >= 0) & (counts > 0)
      std_devs = flex.double(len(sums), 0)
      std_devs.set_selected(stddev_sel,
                           (sums_sq.select(stddev_sel)-sums.select(stddev_sel)* \
                            results.select(stddev_sel))/counts.select(stddev_sel).as_double())
      std_devs = flex.sqrt(std_devs)

      twotheta = flex.double(xrange(len(results)))*extent_two_theta/params.n_bins
      q_vals = 4*math.pi*flex.sin(math.pi*twotheta/360)/beam.get_wavelength()

      if params.low_max_two_theta_limit is None:
        subset = results
      else:
        subset = results.select(twotheta >= params.low_max_two_theta_limit)

      max_result = flex.max(subset)

      if params.x_axis == 'two_theta':
        xvals = twotheta
        max_x = twotheta[flex.first_index(results, max_result)]
      elif params.x_axis == 'q':
        xvals = q_vals
        max_x = q_vals[flex.first_index(results, max_result)]

      for i in xrange(len(results)):
        val = xvals[i]
        if params.output_bins and "%.3f"%results[i] != "nan":
         #logger.write("%9.3f %9.3f\n"%     (val,results[i]))        #.xy  format for Rex.cell.
          logger.write("%9.3f %9.3f %9.3f\n"%(val,results[i],std_devs[i])) #.xye format for GSASII
         #logger.write("%.3f %.3f %.3f\n"%(val,results[i],ds[i]))  # include calculated d spacings
      logger.write("Maximum %s: %f, value: %f\n"%(params.x_axis, max_x, max_result))

      if params.show_plots:
        if params.plot_x_max is not None:
          results = results.select(xvals <= params.plot_x_max)
          xvals = xvals.select(xvals <= params.plot_x_max)
        if params.normalize:
          plt.plot(xvals.as_numpy_array(),(results/flex.max(results)).as_numpy_array(),'-')
        else:
          plt.plot(xvals.as_numpy_array(),results.as_numpy_array(),'-')
        if params.x_axis == 'two_theta':
          plt.xlabel("2 theta")
        elif params.x_axis == 'q':
          plt.xlabel("q")
        plt.ylabel("Avg ADUs")
        if params.plot_y_max is not None:
          plt.ylim(0, params.plot_y_max)

    if params.show_plots:
      #plt.legend([os.path.basename(os.path.splitext(f)[0]) for f in params.file_path], ncol=2)
      plt.show()

  return xvals, results
 def get_closest_idx(data, val):
   from scitbx.array_family import flex
   deltas = flex.abs(data - val)
   return flex.first_index(deltas, flex.min(deltas))