def _goniometer(self): """Initialize the structure for the goniometer.""" values = [ float(e) for e in self._header_dictionary["CRYSTAL_GONIO_VALUES"].split() ] names = [ e.strip() for e in self._header_dictionary["CRYSTAL_GONIO_NAMES"].split() ] units = [ e.strip() for e in self._header_dictionary["CRYSTAL_GONIO_UNITS"].split() ] axis_elts = [ float(e) for e in self._header_dictionary["CRYSTAL_GONIO_VECTORS"].split() ] axes = [ matrix.col(axis_elts[3 * j:3 * (j + 1)]) for j in range(len(units)) ] scan_axis = self._header_dictionary["ROTATION_AXIS_NAME"].strip() # Take only elements that have corresponding units of 'deg' (which is # probably all of them). filt = [e == "deg" for e in units] values = [e for e, f in zip(values, filt) if f] names = [e for e, f in zip(names, filt) if f] axes = [e for e, f in zip(axes, filt) if f] # Multi-axis gonio requires axes in order as viewed from crystal to gonio # base. Assume the SMV header records them in reverse order. axes = flex.vec3_double(reversed(axes)) names = flex.std_string(reversed(names)) values = flex.double(reversed(values)) scan_axis = flex.first_index(names, scan_axis) gonio = self._goniometer_factory.make_multi_axis_goniometer( axes, values, names, scan_axis) # The calculated rotation axis is also recorded in the header. We could # use this to check that the goniometer is as expected rot_axis = tuple( map(float, self._header_dictionary["ROTATION_VECTOR"].split())) for e1, e2 in zip(rot_axis, gonio.get_rotation_axis()): assert abs(e1 - e2) < 1e-6 return gonio
def _goniometer(self): """Construct a goniometer from the records in the mini CBF header.""" if ( "Alpha" in self._cif_header_dictionary and "Kappa" in self._cif_header_dictionary ): # Kappa alpha = float(self._cif_header_dictionary["Alpha"].split()[0]) omega = float(self._cif_header_dictionary["Chi"].split()[0]) kappa = float(self._cif_header_dictionary["Kappa"].split()[0]) phi = float(self._cif_header_dictionary["Phi"].split()[0]) axis = self._cif_header_dictionary["Oscillation_axis"] scanaxis = {"OMEGA": "Omega", "PHI": "Phi"} assert axis in scanaxis # this is the direction the arm points in at datum direction = "+z" return self._goniometer_factory.make_kappa_goniometer( alpha, omega, kappa, phi, direction, scanaxis[axis] ) else: # Smargon phi = float(self._cif_header_dictionary["Phi"].split()[0]) chi = float(self._cif_header_dictionary["Chi"].split()[0]) omega = float(self._cif_header_dictionary["Omega"].split()[0]) names = flex.std_string(("PHI", "CHI", "OMEGA")) axes = flex.vec3_double(((1, 0, 0), (0, 0, -1), (1, 0, 0))) angles = flex.double((phi, chi, omega)) axis = self._cif_header_dictionary["Oscillation_axis"].upper() assert axis in names, axis scan_axis = flex.first_index(names, axis) return self._goniometer_factory.make_multi_axis_goniometer( axes, angles, names, scan_axis )
def _goniometer(self): # goniometer angles in ANGLES are 2-theta, omega, phi, chi (FIXED) # AXIS indexes into this list to define the scan axis (in FORTRAN counting) # START and RANGE define the start and step size for each image _, omega, phi, chi = map(float, self.header_dict["ANGLES"].split()) scan_axis = ["NONE", "2THETA", "OMEGA", "PHI", "CHI", "X", "Y", "Z"] scan_axis = scan_axis[int(self.header_dict["AXIS"])] names = flex.std_string(("PHI", "CHI", "OMEGA")) scan_axis = flex.first_index(names, scan_axis) if scan_axis is None: scan_axis = "OMEGA" # default # https://journals.iucr.org/d/issues/2014/10/00/dz5309/dz5309sup1.pdf axes = flex.vec3_double(((0, -1, 0), (0, 0, 1), (0, 1, 0))) omega -= 180 angles = flex.double((phi, chi, omega)) return self._goniometer_factory.make_multi_axis_goniometer( axes, angles, names, scan_axis)
def _goniometer(self): '''Construct a goniometer from the records in the mini CBF header.''' if ('Alpha' in self._cif_header_dictionary and 'Kappa' in self._cif_header_dictionary): # Kappa alpha = float(self._cif_header_dictionary['Alpha'].split()[0]) omega = float(self._cif_header_dictionary['Chi'].split()[0]) kappa = float(self._cif_header_dictionary['Kappa'].split()[0]) phi = float(self._cif_header_dictionary['Phi'].split()[0]) axis = self._cif_header_dictionary['Oscillation_axis'] scanaxis = {'OMEGA': 'Omega', 'PHI': 'Phi'} assert axis in scanaxis # this is the direction the arm points in at datum direction = '+z' return self._goniometer_factory.make_kappa_goniometer( alpha, omega, kappa, phi, direction, scanaxis[axis]) else: # Smargon from scitbx.array_family import flex phi = float(self._cif_header_dictionary['Phi'].split()[0]) chi = float(self._cif_header_dictionary['Chi'].split()[0]) omega = float(self._cif_header_dictionary['Omega'].split()[0]) names = flex.std_string(("PHI", "CHI", "OMEGA")) axes = flex.vec3_double(((1, 0, 0), (0, 0, -1), (1, 0, 0))) angles = flex.double((phi, chi, omega)) axis = self._cif_header_dictionary['Oscillation_axis'].upper() assert axis in names, axis scan_axis = flex.first_index(names, axis) return self._goniometer_factory.make_multi_axis_goniometer( axes, angles, names, scan_axis)
def find_delta(rho_map, tol): """ Find delta as hinted on fig. 1 of ref. [1] in module charge_flipping """ rho = rho_map.real_map_unpadded().as_1d() max_rho = flex.max(rho) rho /= max_rho sorting = flex.sort_permutation(rho) sorted_rho = rho.select(sorting) n = len(sorted_rho) p,q = n//4, 3*n//4 indexes = flex.double_range(p,q) values = sorted_rho[p:q] c = flex.linear_correlation(indexes, values) assert c.is_well_defined() and c.coefficient() > 0.99 r = flex.linear_regression(indexes, values) a,b = r.y_intercept(), r.slope() deviation = flex.abs(a + b*flex.double_range(n) - sorted_rho) non_linear_sel = deviation > tol low = flex.first_index(non_linear_sel, False) high = flex.last_index(non_linear_sel, False) assert non_linear_sel[low:high].count(False)/(high-low+1) > 0.99 assert sorted_rho[low] < 0 and sorted_rho[high] > 0 return min(sorted_rho[high], -sorted_rho[low]), max_rho
def _silhouette_analysis(self, cluster_labels, linkage_matrix, n_clusters, min_silhouette_score): """Compare valid equal-sized clustering using silhouette scores. Args: cluster_labels (scitbx.array_family.flex.int): linkage_matrix (numpy.ndarray): The hierarchical clustering of centroids of the initial clustering as produced by :func:`scipy.cluster.hierarchy.linkage`. n_clusters (int): Optionally override the automatic determination of the number of clusters. min_silhouette_score (float): The minimum silhouette score to be used in automatic determination of the number of clusters. Returns: cluster_labels (scitbx.array_family.flex.int): A label for each coordinate. """ eps = 1e-6 X = self.coords.as_numpy_array() cluster_labels_input = cluster_labels distances = linkage_matrix[::, 2] distances = np.insert(distances, 0, 0) silhouette_scores = flex.double() thresholds = flex.double() threshold_n_clusters = flex.size_t() for threshold in distances[1:]: cluster_labels = cluster_labels_input.deep_copy() labels = hierarchy.fcluster(linkage_matrix, threshold - eps, criterion="distance").tolist() counts = [labels.count(l) for l in set(labels)] if len(set(counts)) > 1: # only equal-sized clusters are valid continue n = len(set(labels)) if n == 1: continue elif n_clusters is not Auto and n != n_clusters: continue for i in range(len(labels)): cluster_labels.set_selected(cluster_labels_input == i, int(labels[i] - 1)) if len(set(cluster_labels)) == X.shape[0]: # silhouette coefficient not defined if 1 dataset per cluster # not sure what the default value should be sample_silhouette_values = np.full(cluster_labels.size(), 0) else: # Compute the silhouette scores for each sample sample_silhouette_values = metrics.silhouette_samples( X, cluster_labels.as_numpy_array(), metric="cosine") silhouette_avg = sample_silhouette_values.mean() silhouette_scores.append(silhouette_avg) thresholds.append(threshold) threshold_n_clusters.append(n) count_negative = (sample_silhouette_values < 0).sum() logger.info("Clustering:") logger.info(" Number of clusters: %i" % n) logger.info(" Threshold score: %.3f (%.1f deg)" % (threshold, math.degrees(math.acos(1 - threshold)))) logger.info(" Silhouette score: %.3f" % silhouette_avg) logger.info(" -ve silhouette scores: %.1f%%" % (100 * count_negative / sample_silhouette_values.size)) if n_clusters is Auto: idx = flex.max_index(silhouette_scores) else: idx = flex.first_index(threshold_n_clusters, n_clusters) if idx is None: raise Sorry("No valid clustering with %i clusters" % n_clusters) if n_clusters is Auto and silhouette_scores[idx] < min_silhouette_score: # assume single cluster cluster_labels = flex.int(cluster_labels.size(), 0) else: threshold = thresholds[idx] - eps labels = hierarchy.fcluster(linkage_matrix, threshold, criterion="distance") cluster_labels = flex.double(cluster_labels.size(), -1) for i in range(len(labels)): cluster_labels.set_selected(cluster_labels_input == i, float(labels[i] - 1)) return cluster_labels, threshold
def imgCIF_H(cbf_handle): """Initialize a goniometer model from an imgCIF file handle, where it is assumed that the file has already been read.""" # find the goniometer axes and dependencies axis_names = flex.std_string() depends_on = flex.std_string() axes = flex.vec3_double() angles = flex.double() scan_axis = None cbf_handle.find_category(b"axis") for i in range(cbf_handle.count_rows()): cbf_handle.find_column(b"equipment") if cbf_handle.get_value() == b"goniometer": cbf_handle.find_column(b"id") axis_names.append(cbf_handle.get_value()) axis = [] for i in range(3): cbf_handle.find_column(b"vector[%i]" % (i + 1)) axis.append(float(cbf_handle.get_value())) axes.append(axis) cbf_handle.find_column(b"depends_on") depends_on.append(cbf_handle.get_value()) cbf_handle.next_row() # find the starting angles of each goniometer axis and figure out which one # is the scan axis (i.e. non-zero angle_increment) cbf_handle.find_category(b"diffrn_scan_axis") for i in range(cbf_handle.count_rows()): cbf_handle.find_column(b"axis_id") axis_name = cbf_handle.get_value() if axis_name.decode() not in axis_names: cbf_handle.next_row() continue cbf_handle.find_column(b"angle_start") axis_angle = float(cbf_handle.get_value()) cbf_handle.find_column(b"angle_increment") increment = float(cbf_handle.get_value()) angles.append(axis_angle) if abs(increment) > 0: assert ( scan_axis is None ), "More than one scan axis is defined: not currently supported" scan_axis = flex.first_index(axis_names, axis_name) cbf_handle.next_row() assert axes.size() == angles.size() if scan_axis is None: # probably a still shot -> scan axis arbitrary as no scan scan_axis = 0 # figure out the order of the axes from the depends_on values order = flex.size_t() for i in range(axes.size()): if depends_on[i] == ".": o = 0 else: o = flex.first_index(axis_names, depends_on[i]) + 1 assert o not in order order.append(o) # multi-axis gonio requires axes in order as viewed from crystal to gonio base # i.e. the reverse of the order we have from cbf header order = order.reversed() axes = axes.select(order) angles = angles.select(order) axis_names = axis_names.select(order) scan_axis = axes.size() - scan_axis - 1 # construct a multi-axis goniometer gonio = GoniometerFactory.multi_axis(axes, angles, axis_names, scan_axis) return gonio
def get_closest_idx(data, val): deltas = flex.abs(data - val) return flex.first_index(deltas, flex.min(deltas))
def run(self): '''Execute the script.''' import os, math from cctbx.crystal import symmetry from scitbx.array_family import flex from libtbx import table_utils, easy_pickle from xfel.command_line.cspad_cbf_metrology import find_files from dxtbx.model.experiment.experiment_list import ExperimentListFactory table_header = ["","","","I","IsigI","N >","RMSD","Cutoff"] table_header2 = ["Bin","Resolution Range","Completeness","","","cutoff","(um)",""] # Parse the command line params, options, all_paths = self.parser.parse_args(show_diff_phil=False, return_unhandled=True) exp_paths = [] refl_paths = [] for path in all_paths: exps, refs = find_files(path, "integrated") exp_paths.extend(exps) refl_paths.extend(refs) assert len(exp_paths) == len(refl_paths) best_data = {} best_limits = flex.double() for exp_path, refl_path in zip(exp_paths, refl_paths): experiments = ExperimentListFactory.from_json_file(exp_path) reflections = easy_pickle.load(refl_path) exp_name = os.path.basename(exp_path) if exp_name.startswith("idx-") and exp_name.endswith("_refined_experiments.json"): tag = exp_name.lstrip("idx-").rstrip("_refined_experiments.json") else: tag = "%s, %s"%(exp_path, refl_path) for exp_id, experiment in enumerate(experiments): print "*"*80 print "Data table for", tag table_data = [] table_data.append(table_header) table_data.append(table_header2) crystal = experiment.crystal refls = reflections.select(reflections['id'] == exp_id) sym = symmetry(unit_cell = crystal.get_unit_cell(), space_group = crystal.get_space_group()) d = crystal.get_unit_cell().d(refls['miller_index']) mset = sym.miller_set(indices = refls['miller_index'].select(d>=params.d_min), anomalous_flag=False) binner = mset.setup_binner(n_bins=params.n_bins) acceptable_resolution_bins = [] for i in binner.range_used(): d_max, d_min = binner.bin_d_range(i) sel = (d <= d_max) & (d > d_min) sel &= refls['intensity.sum.value'] > 0 bin_refls = refls.select(sel) n_refls = len(bin_refls) avg_i = flex.mean(bin_refls['intensity.sum.value']) if n_refls > 0 else 0 avg_i_sigi = flex.mean(bin_refls['intensity.sum.value'] / flex.sqrt(bin_refls['intensity.sum.variance'])) if n_refls > 0 else 0 acceptable_resolution_bins.append(avg_i_sigi >= params.sig_filter_sigma) bright_refls = bin_refls.select((bin_refls['intensity.sum.value']/flex.sqrt(bin_refls['intensity.sum.variance'])) >= params.sig_filter_sigma) n_bright = len(bright_refls) rmsd_obs = 1000*math.sqrt((bright_refls['xyzcal.mm']-bright_refls['xyzobs.mm.value']).sum_sq()/n_bright) if n_bright > 0 else 0 table_row = [] table_row.append("%3d"%i) table_row.append("%-13s"%binner.bin_legend(i_bin=i,show_bin_number=False,show_bin_range=False, show_d_range=True, show_counts=False)) table_row.append("%13s"%binner.bin_legend(i_bin=i,show_bin_number=False,show_bin_range=False, show_d_range=False, show_counts=True)) table_row.append("%.1f"%(avg_i)) table_row.append("%.1f"%(avg_i_sigi)) table_row.append("%3d"%n_bright) table_row.append("%.1f"%(rmsd_obs)) table_data.append(table_row) acceptable_resolution_bins = [acceptable_resolution_bins[i] for i in xrange(len(acceptable_resolution_bins)) if False not in acceptable_resolution_bins[:i+1]] for b, row in zip(acceptable_resolution_bins, table_data[2:]): if b: row.append("X") print table_utils.format(table_data,has_header=2,justify='center',delim=" ") if any(acceptable_resolution_bins): best_index = acceptable_resolution_bins.count(True)-1 best_row = table_data[best_index+2] d_min = binner.bin_d_range(binner.range_used()[best_index])[1] if len(best_limits) < params.best_count: best_limits.append(d_min) best_data[tag] = d_min, best_row elif (d_min < best_limits).count(True) > 0: worst_d_min = flex.max(best_limits) for tag, data in best_data.iteritems(): if worst_d_min == data[0]: best_data[tag] = d_min, best_row best_limits[flex.first_index(best_limits, worst_d_min)] = d_min break print tag, "best row:", " ".join(best_row) else: print "Data didn't pass cutoff" if len(best_limits) > 0: print "*"*80 print "Top", len(best_limits) for tag, data in best_data.iteritems(): print tag, " ".join(data[1])
def __call__(self): from iotbx.detectors.cspad_detector_formats import reverse_timestamp run_numbers = [r.run for r in self.trial.runs] assert self.run.run in run_numbers rungroup_ids = [rg.id for rg in self.trial.rungroups] assert self.rungroup.id in rungroup_ids isoforms = self.trial.isoforms assert len(isoforms) > 0 low_res_bin_ids = [] high_res_bin_ids = [] for isoform in isoforms: bins = isoform.cell.bins d_mins = [float(b.d_min) for b in bins] low_res_bin_ids.append(str(bins[d_mins.index(max(d_mins))].id)) if self.d_min is None: min_bin_index = d_mins.index(min(d_mins)) else: d_maxes = [float(b.d_max) for b in bins] qualified_bin_indices = [i for i in xrange(len(bins)) if d_maxes[i] >= self.d_min and d_mins[i] <= self.d_min] assert len(qualified_bin_indices) == 1 min_bin_index = qualified_bin_indices[0] high_res_bin_ids.append(str(bins[min_bin_index].id)) assert len(low_res_bin_ids) > 0 assert len(high_res_bin_ids) > 0 assert len(low_res_bin_ids) == len(high_res_bin_ids) tag = self.app.params.experiment_tag # Get the high and low res avg_i_sigi in one query. Means there will be 2x timestamps retrieved, where each is found twice query = """SELECT bin.id, event.timestamp, event.n_strong, cb.avg_i_sigi, event.two_theta_low, event.two_theta_high FROM `%s_event` event JOIN `%s_imageset_event` is_e ON is_e.event_id = event.id JOIN `%s_imageset` imgset ON imgset.id = is_e.imageset_id JOIN `%s_experiment` exp ON exp.imageset_id = imgset.id JOIN `%s_crystal` crystal ON crystal.id = exp.crystal_id JOIN `%s_cell` cell ON cell.id = crystal.cell_id JOIN `%s_bin` bin ON bin.cell_id = cell.id JOIN `%s_cell_bin` cb ON cb.bin_id = bin.id AND cb.crystal_id = crystal.id WHERE event.trial_id = %d AND event.run_id = %d AND event.rungroup_id = %d AND cb.bin_id IN (%s) """ % (tag, tag, tag, tag, tag, tag, tag, tag, self.trial.id, self.run.id, self.rungroup.id, ", ".join(low_res_bin_ids + high_res_bin_ids)) cursor = self.app.execute_query(query) timestamps = flex.double() n_strong = flex.int() average_i_sigi_low = flex.double() average_i_sigi_high = flex.double() two_theta_low = flex.double() two_theta_high = flex.double() for row in cursor.fetchall(): b_id, ts, n_s, avg_i_sigi, tt_low, tt_high = row rts = reverse_timestamp(ts) rts = rts[0] + (rts[1]/1000) if rts not in timestamps: # First time through, figure out which bin is reported (high or low), add avg_i_sigi to that set of results timestamps.append(rts) n_strong.append(n_s) two_theta_low.append(tt_low or -1) two_theta_high.append(tt_high or -1) if str(b_id) in low_res_bin_ids: average_i_sigi_low.append(avg_i_sigi or 1e-6) average_i_sigi_high.append(0) elif str(b_id) in high_res_bin_ids: average_i_sigi_low.append(0) average_i_sigi_high.append(avg_i_sigi or 0) else: assert False else: # Second time through, already have added to timestamps and n_strong, so fill in missing avg_i_sigi index = flex.first_index(timestamps, rts) if str(b_id) in low_res_bin_ids: average_i_sigi_low[index] = avg_i_sigi elif str(b_id) in high_res_bin_ids: average_i_sigi_high[index] = avg_i_sigi or 0 else: assert False # This left join query finds the events with no imageset, meaning they failed to index query = """SELECT event.timestamp, event.n_strong, event.two_theta_low, event.two_theta_high FROM `%s_event` event LEFT JOIN `%s_imageset_event` is_e ON is_e.event_id = event.id WHERE is_e.event_id IS NULL AND event.trial_id = %d AND event.run_id = %d AND event.rungroup_id = %d """ % (tag, tag, self.trial.id, self.run.id, self.rungroup.id) cursor = self.app.execute_query(query) for row in cursor.fetchall(): ts, n_s, tt_low, tt_high = row rts = reverse_timestamp(ts) timestamps.append(rts[0] + (rts[1]/1000)) n_strong.append(n_s) average_i_sigi_low.append(0) average_i_sigi_high.append(0) two_theta_low.append(tt_low or -1) two_theta_high.append(tt_high or -1) order = flex.sort_permutation(timestamps) timestamps = timestamps.select(order) n_strong = n_strong.select(order) average_i_sigi_low = average_i_sigi_low.select(order) average_i_sigi_high = average_i_sigi_high.select(order) two_theta_low = two_theta_low.select(order) two_theta_high = two_theta_high.select(order) return timestamps, two_theta_low, two_theta_high, n_strong, average_i_sigi_low, average_i_sigi_high
def get_closest_idx(data, val): from scitbx.array_family import flex deltas = flex.abs(data - val) return flex.first_index(deltas, flex.min(deltas))
def run(self): """Execute the script.""" import os, math from cctbx.crystal import symmetry from scitbx.array_family import flex from libtbx import table_utils, easy_pickle from xfel.command_line.cspad_cbf_metrology import find_files from dxtbx.model.experiment_list import ExperimentListFactory table_header = ["", "", "", "I", "IsigI", "N >", "RMSD", "Cutoff"] table_header2 = [ "Bin", "Resolution Range", "Completeness", "", "", "cutoff", "(um)", "", ] # Parse the command line params, options, all_paths = self.parser.parse_args( show_diff_phil=False, return_unhandled=True) exp_paths = [] refl_paths = [] for path in all_paths: exps, refs = find_files(path, "integrated") exp_paths.extend(exps) refl_paths.extend(refs) assert len(exp_paths) == len(refl_paths) best_data = {} best_limits = flex.double() for exp_path, refl_path in zip(exp_paths, refl_paths): experiments = ExperimentListFactory.from_json_file( exp_path, check_format=False) reflections = easy_pickle.load(refl_path) exp_name = os.path.basename(exp_path) if exp_name.startswith("idx-") and exp_name.endswith( "_refined_experiments.json"): tag = exp_name.lstrip("idx-").rstrip( "_refined_experiments.json") else: tag = "%s, %s" % (exp_path, refl_path) for exp_id, experiment in enumerate(experiments): print("*" * 80) print("Data table for", tag) table_data = [] table_data.append(table_header) table_data.append(table_header2) crystal = experiment.crystal refls = reflections.select(reflections["id"] == exp_id) sym = symmetry( unit_cell=crystal.get_unit_cell(), space_group=crystal.get_space_group(), ) d = crystal.get_unit_cell().d(refls["miller_index"]) mset = sym.miller_set( indices=refls["miller_index"].select(d >= params.d_min), anomalous_flag=False, ) binner = mset.setup_binner(n_bins=params.n_bins) acceptable_resolution_bins = [] for i in binner.range_used(): d_max, d_min = binner.bin_d_range(i) sel = (d <= d_max) & (d > d_min) sel &= refls["intensity.sum.value"] > 0 bin_refls = refls.select(sel) n_refls = len(bin_refls) avg_i = (flex.mean(bin_refls["intensity.sum.value"]) if n_refls > 0 else 0) avg_i_sigi = (flex.mean( bin_refls["intensity.sum.value"] / flex.sqrt(bin_refls["intensity.sum.variance"])) if n_refls > 0 else 0) acceptable_resolution_bins.append( avg_i_sigi >= params.sig_filter_sigma) bright_refls = bin_refls.select( (bin_refls["intensity.sum.value"] / flex.sqrt(bin_refls["intensity.sum.variance"]) ) >= params.sig_filter_sigma) n_bright = len(bright_refls) rmsd_obs = (1000 * math.sqrt( (bright_refls["xyzcal.mm"] - bright_refls["xyzobs.mm.value"]).sum_sq() / n_bright) if n_bright > 0 else 0) table_row = [] table_row.append("%3d" % i) table_row.append("%-13s" % binner.bin_legend( i_bin=i, show_bin_number=False, show_bin_range=False, show_d_range=True, show_counts=False, )) table_row.append("%13s" % binner.bin_legend( i_bin=i, show_bin_number=False, show_bin_range=False, show_d_range=False, show_counts=True, )) table_row.append("%.1f" % (avg_i)) table_row.append("%.1f" % (avg_i_sigi)) table_row.append("%3d" % n_bright) table_row.append("%.1f" % (rmsd_obs)) table_data.append(table_row) acceptable_resolution_bins = [ acceptable_resolution_bins[i] for i in xrange(len(acceptable_resolution_bins)) if False not in acceptable_resolution_bins[:i + 1] ] for b, row in zip(acceptable_resolution_bins, table_data[2:]): if b: row.append("X") print( table_utils.format(table_data, has_header=2, justify="center", delim=" ")) print( tag, "unit cell:", ", ".join([ "%.2f" % p for p in crystal.get_unit_cell().parameters() ]), crystal.get_space_group().info(), ) if any(acceptable_resolution_bins): best_index = acceptable_resolution_bins.count(True) - 1 best_row = table_data[best_index + 2] d_min = binner.bin_d_range( binner.range_used()[best_index])[1] if len(best_limits) < params.best_count: best_limits.append(d_min) best_data[tag] = d_min, best_row elif (d_min < best_limits).count(True) > 0: worst_d_min = flex.max(best_limits) for t, data in best_data.iteritems(): if worst_d_min == data[0]: best_data[t] = d_min, best_row best_limits[flex.first_index( best_limits, worst_d_min)] = d_min break print(tag, "best row:", " ".join(best_row)) else: print("Data didn't pass cutoff") if len(best_limits) > 0: print("*" * 80) print("Top", len(best_limits)) for tag, data in best_data.iteritems(): print(tag, " ".join(data[1]))
def __call__(self): from iotbx.detectors.cspad_detector_formats import reverse_timestamp run_numbers = [r.run for r in self.trial.runs] assert self.run.run in run_numbers rungroup_ids = [rg.id for rg in self.trial.rungroups] assert self.rungroup.id in rungroup_ids isoforms = self.trial.isoforms assert len(isoforms) > 0 low_res_bin_ids = [] high_res_bin_ids = [] for isoform in isoforms: bins = isoform.cell.bins d_mins = [float(b.d_min) for b in bins] low_res_bin_ids.append(str(bins[d_mins.index(max(d_mins))].id)) if self.d_min is None: min_bin_index = d_mins.index(min(d_mins)) else: d_maxes = [float(b.d_max) for b in bins] qualified_bin_indices = [ i for i in xrange(len(bins)) if d_maxes[i] >= self.d_min and d_mins[i] <= self.d_min ] assert len(qualified_bin_indices) == 1 min_bin_index = qualified_bin_indices[0] high_res_bin_ids.append(str(bins[min_bin_index].id)) assert len(low_res_bin_ids) > 0 assert len(high_res_bin_ids) > 0 assert len(low_res_bin_ids) == len(high_res_bin_ids) tag = self.app.params.experiment_tag # Get the high and low res avg_i_sigi in one query. Means there will be 2x timestamps retrieved, where each is found twice query = """SELECT bin.id, event.timestamp, event.n_strong, cb.avg_i_sigi, event.two_theta_low, event.two_theta_high FROM `%s_event` event JOIN `%s_imageset_event` is_e ON is_e.event_id = event.id JOIN `%s_imageset` imgset ON imgset.id = is_e.imageset_id JOIN `%s_experiment` exp ON exp.imageset_id = imgset.id JOIN `%s_crystal` crystal ON crystal.id = exp.crystal_id JOIN `%s_cell` cell ON cell.id = crystal.cell_id JOIN `%s_bin` bin ON bin.cell_id = cell.id JOIN `%s_cell_bin` cb ON cb.bin_id = bin.id AND cb.crystal_id = crystal.id WHERE event.trial_id = %d AND event.run_id = %d AND event.rungroup_id = %d AND cb.bin_id IN (%s) """ % (tag, tag, tag, tag, tag, tag, tag, tag, self.trial.id, self.run.id, self.rungroup.id, ", ".join(low_res_bin_ids + high_res_bin_ids)) cursor = self.app.execute_query(query) timestamps = flex.double() n_strong = flex.int() average_i_sigi_low = flex.double() average_i_sigi_high = flex.double() two_theta_low = flex.double() two_theta_high = flex.double() for row in cursor.fetchall(): b_id, ts, n_s, avg_i_sigi, tt_low, tt_high = row rts = reverse_timestamp(ts) rts = rts[0] + (rts[1] / 1000) if rts not in timestamps: # First time through, figure out which bin is reported (high or low), add avg_i_sigi to that set of results timestamps.append(rts) n_strong.append(n_s) two_theta_low.append(tt_low or -1) two_theta_high.append(tt_high or -1) if str(b_id) in low_res_bin_ids: average_i_sigi_low.append(avg_i_sigi or 1e-6) average_i_sigi_high.append(0) elif str(b_id) in high_res_bin_ids: average_i_sigi_low.append(0) average_i_sigi_high.append(avg_i_sigi or 0) else: assert False else: # Second time through, already have added to timestamps and n_strong, so fill in missing avg_i_sigi index = flex.first_index(timestamps, rts) if str(b_id) in low_res_bin_ids: average_i_sigi_low[index] = avg_i_sigi elif str(b_id) in high_res_bin_ids: average_i_sigi_high[index] = avg_i_sigi or 0 else: assert False # This left join query finds the events with no imageset, meaning they failed to index query = """SELECT event.timestamp, event.n_strong, event.two_theta_low, event.two_theta_high FROM `%s_event` event LEFT JOIN `%s_imageset_event` is_e ON is_e.event_id = event.id WHERE is_e.event_id IS NULL AND event.trial_id = %d AND event.run_id = %d AND event.rungroup_id = %d """ % (tag, tag, self.trial.id, self.run.id, self.rungroup.id) cursor = self.app.execute_query(query) for row in cursor.fetchall(): ts, n_s, tt_low, tt_high = row rts = reverse_timestamp(ts) timestamps.append(rts[0] + (rts[1] / 1000)) n_strong.append(n_s) average_i_sigi_low.append(0) average_i_sigi_high.append(0) two_theta_low.append(tt_low or -1) two_theta_high.append(tt_high or -1) order = flex.sort_permutation(timestamps) timestamps = timestamps.select(order) n_strong = n_strong.select(order) average_i_sigi_low = average_i_sigi_low.select(order) average_i_sigi_high = average_i_sigi_high.select(order) two_theta_low = two_theta_low.select(order) two_theta_high = two_theta_high.select(order) return timestamps, two_theta_low, two_theta_high, n_strong, average_i_sigi_low, average_i_sigi_high
def imgCIF_H(cbf_handle): '''Initialize a goniometer model from an imgCIF file handle, where it is assumed that the file has already been read.''' # find the goniometer axes and dependencies from scitbx.array_family import flex axis_names = flex.std_string() depends_on = flex.std_string() axes = flex.vec3_double() angles = flex.double() scan_axis = None cbf_handle.find_category("axis") for i in range(cbf_handle.count_rows()): cbf_handle.find_column("equipment") if cbf_handle.get_value() == "goniometer": cbf_handle.find_column("id") axis_names.append(cbf_handle.get_value()) axis = [] for i in range(3): cbf_handle.find_column("vector[%i]" %(i+1)) axis.append(float(cbf_handle.get_value())) axes.append(axis) cbf_handle.find_column("depends_on") depends_on.append(cbf_handle.get_value()) cbf_handle.next_row() # find the starting angles of each goniometer axis and figure out which one # is the scan axis (i.e. non-zero angle_increment) cbf_handle.find_category("diffrn_scan_axis") for i in range(cbf_handle.count_rows()): cbf_handle.find_column("axis_id") axis_name = cbf_handle.get_value() if axis_name not in axis_names: continue cbf_handle.find_column("angle_start") axis_angle = float(cbf_handle.get_value()) cbf_handle.find_column("angle_increment") increment = float(cbf_handle.get_value()) angles.append(axis_angle) if abs(increment) > 0: assert scan_axis is None, "More than one scan axis is defined: not currently supported" scan_axis = flex.first_index(axis_names, axis_name) cbf_handle.next_row() assert axes.size() == angles.size() assert scan_axis is not None # figure out the order of the axes from the depends_on values order = flex.size_t() for i in range(axes.size()): if depends_on[i] == '.': o = 0 else: o = flex.first_index(axis_names, depends_on[i])+1 assert o not in order order.append(o) # multi-axis gonio requires axes in order as viewed from crystal to gonio base # i.e. the reverse of the order we have from cbf header order = order.reversed() axes = axes.select(order) angles = angles.select(order) axis_names = axis_names.select(order) scan_axis = axes.size() - scan_axis - 1 # construct a multi-axis goniometer gonio = goniometer_factory.multi_axis(axes, angles, axis_names, scan_axis) return gonio
def find_relatives(ids, cc_min, cc_max, rmax, codes, moments, nmax=10): indices = flex.int() idlist = open('id_list.txt', 'r') for id in idlist: id = id[0:4] indices.append(flex.first_index(codes, id)) r_max = easy_pickle.load(prefix + 'pisa.rmax') nns = easy_pickle.load(prefix + 'pisa.nn') nn_array = math.nl_array(nmax) nn_indx = nn_array.nl() nn_total = nn_indx.size() q_array = flex.double(range(501)) / 2000.0 ref_nlm_array = math.nlm_array(nmax) target_nlm_array = math.nlm_array(nmax) nlm = ref_nlm_array.nlm() coef_size = nlm.size() all_indices = range(codes.size()) small_q_array = flex.double(range(51)) / 300.0 mean = [] sig = [] for indx in indices: print indx #rmax = 50.0 #r_max[indx] ref_coef = moments[indx] ref_nlm_array.load_coefs(nlm, ref_coef[0:coef_size]) z_model = zernike_model(ref_nlm_array, q_array, rmax, nmax) out_name = codes[indx] + "_.qi" nn_array.load_coefs(nn_indx, nns[indx][0:nn_total]) ref_int = put_intensity(z_model, q_array, nn_array, out_name) mean_r = ref_int * 0.0 sig_r = ref_int * 0.0 small_z_model = zernike_model(ref_nlm_array, small_q_array, rmax, nmax) small_ref_int = small_z_model.calc_intensity(nn_array) small_ref_int = small_ref_int / small_ref_int[0] N = 0.0 for coef, ii in zip(moments, all_indices): if N > 25: break target_nlm_array.load_coefs(nlm, coef[0:coef_size]) align_obj = fft_align.align(ref_nlm_array, target_nlm_array, nmax=nmax, topn=10, refine=False) cc = align_obj.get_cc() if (cc >= cc_min and cc <= cc_max): N += 1 nn_array.load_coefs(nn_indx, nns[ii][0:nn_total]) opt_r_obj = optimize_r(nn_array, small_ref_int, small_q_array, nmax) opt_r = gss(opt_r_obj.target, rmax * 0.8, rmax * 1.2) z_model = zernike_model(ref_nlm_array, q_array, opt_r, nmax) out_name = codes[indx] + "_" + codes[ii] + ".qi.rel" mod_int = put_intensity(z_model, q_array, nn_array, out_name, ref_int) out_name = codes[indx] + "_" + codes[ii] + ".qi" put_intensity(z_model, q_array, nn_array, out_name) mod_int = mod_int - 1.0 mean_r += mod_int sig_r += mod_int * mod_int print ii, cc, codes[ii], opt_r if N > 3: mean_r /= N sig_r = sig_r / N - mean_r * mean_r mean.append(mean_r) sig.append(sig_r) N = len(mean) if N > 0: mean_r = mean[0] * 0.0 s_r = mean[0] * 0.0 for uu in range(N): mean_r += mean[uu] s_r += sig[uu] mean_r /= N s_r /= N s_r = flex.sqrt(s_r) f = open('q_m_s_%s.dat' % rmax, 'w') for q, m, s in zip(q_array, mean_r, s_r): print >> f, q, m, s
def cluster_analysis(self): from cctbx.sgtbx import cosets if self.params.cluster.method == 'dbscan': self.dbscan_clustering() elif self.params.cluster.method == 'bisect': self.bisect_clustering() elif self.params.cluster.method == 'minimize_divide': self.minimize_divide_clustering() elif self.params.cluster.method == 'agglomerative': self.agglomerative_clustering() elif self.params.cluster.method == 'seed': self.seed_clustering() # Number of clusters in labels, ignoring noise if present. n_clusters = len(set( self.cluster_labels)) - (1 if -1 in self.cluster_labels else 0) cluster_miller_arrays = [] space_groups = [] sym_ops = [ sgtbx.rt_mx(s).new_denominators(1, 12) for s in self.target.get_sym_ops() ] self.space_groups = space_groups reindexing_ops = {} space_groups = {} for dataset_id in range(len(self.datasets)): sg = copy.deepcopy(self.input_space_group) ref_sym_op_id = None ref_cluster_id = None for sym_op_id in range(len(sym_ops)): i_cluster = self.cluster_labels[len(self.datasets) * sym_op_id + dataset_id] if i_cluster < 0: continue if ref_sym_op_id is None: ref_sym_op_id = sym_op_id ref_cluster_id = i_cluster continue op = sym_ops[ref_sym_op_id].inverse().multiply( sym_ops[sym_op_id]) if i_cluster == ref_cluster_id: sg.expand_smx(op.new_denominators(1, 12)) sg.make_tidy() space_groups[dataset_id] = sg coset = cosets.left_decomposition( self.target._lattice_group, sg.info().primitive_setting().group()) reindexing_ops[dataset_id] = {} for i_cluster in range(n_clusters): isel = (self.cluster_labels == i_cluster).iselection() dataset_ids = isel % len(self.datasets) idx = flex.first_index(dataset_ids, dataset_id) sel = (dataset_ids == dataset_id).iselection() if idx >= 0: sym_op_id = isel[idx] // len(self.datasets) for s in sel: sym_op_id = isel[s] // len(self.datasets) for partition in coset.partitions: if sym_ops[sym_op_id] in partition: if i_cluster not in reindexing_ops[dataset_id]: reindexing_ops[dataset_id][ i_cluster] = partition[0].as_xyz() #else: #assert reindexing_ops[dataset_id][i_cluster] == partition[0].as_xyz() self.space_groups = space_groups self.reindexing_ops = reindexing_ops
def run(args, imageset=None): # Parse input try: len(args) except Exception: params = args else: user_phil = [] for arg in args: if "=" in arg: try: user_phil.append(libtbx.phil.parse(arg)) except RuntimeError as e: raise Sorry("Unrecognized argument '%s' (error: %s)" % (arg, str(e))) else: try: user_phil.append( libtbx.phil.parse("""file_path=%s""" % arg)) except ValueError: raise Sorry("Unrecognized argument '%s'" % arg) params = master_phil.fetch(sources=user_phil).extract() if imageset is None: if (params.file_path is None or len(params.file_path) == 0 or not all(os.path.isfile(f) for f in params.file_path)): master_phil.show() raise Usage( "file_path must be defined (either file_path=XXX, or the path alone)." ) assert params.n_bins is not None assert params.verbose is not None assert params.output_bins is not None # Allow writing to a file instead of stdout if params.output_file is None: logger = sys.stdout else: logger = open(params.output_file, "w") logger.write("%s " % params.output_file) if params.show_plots: from matplotlib import pyplot as plt colormap = plt.cm.gist_ncar plt.gca().set_color_cycle( [colormap(i) for i in np.linspace(0, 0.9, len(params.file_path))]) if params.mask is not None: params.mask = easy_pickle.load(params.mask) if imageset is None: iterable = params.file_path def load_func(x): try: obj = dxtbx.datablock.DataBlockFactory.from_filenames( [x])[0].extract_imagesets()[0] except IndexError: try: obj = dxtbx.datablock.DataBlockFactory.from_json_file( x)[0].extract_imagesets()[0] except dxtbx.datablock.InvalidDataBlockError: obj = ExperimentListFactory.from_json_file(x)[0].imageset return obj else: iterable = [imageset] def load_func(x): return x # Iterate over each file provided for item in iterable: iset = load_func(item) n_images = len(iset) if params.image_number is None: if params.max_images is None: subiterable = range(n_images) else: subiterable = range(0, min(params.max_images, n_images)) else: subiterable = [params.image_number] for image_number in subiterable: beam = iset.get_beam(image_number) detector = iset.get_detector(image_number) s0 = col(beam.get_s0()) # Search the detector for the panel farthest from the beam. The # number of bins in the radial average will be equal to the # farthest point from the beam on the detector, in pixels, unless # overridden at the command line panel_res = [p.get_max_resolution_at_corners(s0) for p in detector] farthest_panel = detector[panel_res.index(min(panel_res))] size2, size1 = farthest_panel.get_image_size() corners = [(0, 0), (size1 - 1, 0), (0, size2 - 1), (size1 - 1, size2 - 1)] corners_lab = [ col(farthest_panel.get_pixel_lab_coord(c)) for c in corners ] corner_two_thetas = [ farthest_panel.get_two_theta_at_pixel(s0, c) for c in corners ] extent_two_theta = max(corner_two_thetas) max_corner = corners_lab[corner_two_thetas.index(extent_two_theta)] extent = int( math.ceil(max_corner.length() * math.sin(extent_two_theta) / max(farthest_panel.get_pixel_size()))) extent_two_theta *= 180 / math.pi if params.n_bins < extent: params.n_bins = extent # These arrays will store the radial average info sums = flex.double(params.n_bins) * 0 sums_sq = flex.double(params.n_bins) * 0 counts = flex.int(params.n_bins) * 0 all_data = iset[image_number] if not isinstance(all_data, tuple): all_data = (all_data, ) for tile, (panel, data) in enumerate(zip(detector, all_data)): if params.panel is not None and tile != params.panel: continue if params.mask is None: mask = flex.bool(flex.grid(data.focus()), True) else: mask = params.mask[tile] if hasattr(data, "as_double"): data = data.as_double() logger.flush() if params.verbose: logger.write("Average intensity tile %d: %9.3f\n" % (tile, flex.mean(data))) logger.write("N bins: %d\n" % params.n_bins) logger.flush() x1, y1, x2, y2 = ( 0, 0, panel.get_image_size()[1], panel.get_image_size()[0], ) bc = panel.get_beam_centre_px(beam.get_s0()) bc = int(round(bc[1])), int(round(bc[0])) # compute the average radial_average( data, mask, bc, sums, sums_sq, counts, panel.get_pixel_size()[0], panel.get_distance(), (x1, y1), (x2, y2), ) # average the results, avoiding division by zero results = sums.set_selected(counts <= 0, 0) results /= counts.set_selected(counts <= 0, 1).as_double() if params.median_filter_size is not None: logger.write( "WARNING, the median filter is not fully propagated to the variances\n" ) from scipy.ndimage.filters import median_filter results = flex.double( median_filter(results.as_numpy_array(), size=params.median_filter_size)) # calculate standard devations stddev_sel = ((sums_sq - sums * results) >= 0) & (counts > 0) std_devs = flex.double(len(sums), 0) std_devs.set_selected( stddev_sel, (sums_sq.select(stddev_sel) - sums.select(stddev_sel) * results.select(stddev_sel)) / counts.select(stddev_sel).as_double(), ) std_devs = flex.sqrt(std_devs) twotheta = (flex.double(range(len(results))) * extent_two_theta / params.n_bins) q_vals = (4 * math.pi * flex.sin(math.pi * twotheta / 360) / beam.get_wavelength()) # nlmbda = 2dsin(theta) resolution = flex.double(len(twotheta), 0) nonzero = twotheta > 0 resolution.set_selected( nonzero, beam.get_wavelength() / (2 * flex.asin( (math.pi / 180) * twotheta.select(nonzero) / 2)), ) if params.low_max_two_theta_limit is None: subset = results else: subset = results.select( twotheta >= params.low_max_two_theta_limit) max_result = flex.max(subset) if params.x_axis == "two_theta": xvals = twotheta max_x = twotheta[flex.first_index(results, max_result)] elif params.x_axis == "q": xvals = q_vals max_x = q_vals[flex.first_index(results, max_result)] elif params.x_axis == "resolution": xvals = resolution max_x = resolution[flex.first_index(results, max_result)] for i, r in enumerate(results): val = xvals[i] if params.output_bins and "%.3f" % r != "nan": # logger.write("%9.3f %9.3f\n"% (val,r)) #.xy format for Rex.cell. logger.write( "%9.3f %9.3f %9.3f\n" % (val, r, std_devs[i])) # .xye format for GSASII # logger.write("%.3f %.3f %.3f\n"%(val,r,ds[i])) # include calculated d spacings logger.write("Maximum %s: %f, value: %f\n" % (params.x_axis, max_x, max_result)) if params.show_plots: if params.plot_x_max is not None: results = results.select(xvals <= params.plot_x_max) xvals = xvals.select(xvals <= params.plot_x_max) if params.normalize: plt.plot( xvals.as_numpy_array(), (results / flex.max(results)).as_numpy_array(), "-", ) else: plt.plot(xvals.as_numpy_array(), results.as_numpy_array(), "-") if params.x_axis == "two_theta": plt.xlabel("2 theta") elif params.x_axis == "q": plt.xlabel("q") elif params.x_axis == "resolution": plt.xlabel("Resolution ($\\AA$)") plt.gca().set_xscale("log") plt.gca().invert_xaxis() plt.xlim(0, 50) plt.ylabel("Avg ADUs") if params.plot_y_max is not None: plt.ylim(0, params.plot_y_max) if params.show_plots: # plt.legend([os.path.basename(os.path.splitext(f)[0]) for f in params.file_path], ncol=2) plt.show() return xvals, results
def seed_clustering(self): eps = 1e-6 X_orig = self.coords.as_numpy_array() import numpy as np from scipy.cluster import hierarchy import scipy.spatial.distance as ssd from sklearn.neighbors import NearestNeighbors from sklearn import metrics # initialise cluster labels: -1 signifies doesn't belong to a cluster self.cluster_labels = flex.int(self.coords.all()[0], -1) cluster_id = 0 while self.cluster_labels.count(-1) > 0: dataset_ids = (flex.int_range( len(self.datasets) * len(self.target.get_sym_ops())) % len(self.datasets)).as_numpy_array() coord_ids = flex.int_range(dataset_ids.size).as_numpy_array() # select only those points that don't already belong to a cluster sel = np.where(self.cluster_labels == -1) X = X_orig[sel] dataset_ids = dataset_ids[sel] coord_ids = coord_ids[sel] # choose a high density point as seed for cluster nbrs = NearestNeighbors(n_neighbors=min(11, len(X)), algorithm='brute', metric='cosine').fit(X) distances, indices = nbrs.kneighbors(X) average_distance = flex.double( [dist[1:].mean() for dist in distances]) i = flex.min_index(average_distance) d_id = dataset_ids[i] cluster = np.array([coord_ids[i]]) cluster_dataset_ids = np.array([d_id]) xis = np.array([X[i]]) for j in range(len(self.datasets) - 1): # select only those rows that don't correspond to a dataset already # present in current cluster sel = np.where(dataset_ids != d_id) X = X[sel] dataset_ids = dataset_ids[sel] coord_ids = coord_ids[sel] assert len(X) > 0 # Find nearest neighbour in cosine-space to the current cluster centroid nbrs = NearestNeighbors(n_neighbors=min(1, len(X)), algorithm='brute', metric='cosine').fit(X) distances, indices = nbrs.kneighbors([xis.mean(axis=0)]) k = indices[0][0] d_id = dataset_ids[k] cluster = np.append(cluster, coord_ids[k]) cluster_dataset_ids = np.append(cluster_dataset_ids, d_id) xis = np.append(xis, [X[k]], axis=0) # label this cluster self.cluster_labels.set_selected(flex.size_t(cluster.tolist()), cluster_id) cluster_id += 1 if flex.max(self.cluster_labels) == 0: # assume single cluster return self.cluster_labels cluster_centroids = [] X = self.coords.as_numpy_array() for i in set(self.cluster_labels): sel = self.cluster_labels == i cluster_centroids.append(X[( self.cluster_labels == i).iselection().as_numpy_array()].mean( axis=0)) # hierarchical clustering of cluster centroids, using cosine metric dist_mat = ssd.pdist(cluster_centroids, metric='cosine') linkage_matrix = hierarchy.linkage(dist_mat, method='average') # compare valid equal-sized clustering using silhouette scores # https://en.wikipedia.org/wiki/Silhouette_(clustering) # http://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_silhouette_analysis.html distances = linkage_matrix[::, 2] distances = np.insert(distances, 0, 0) silhouette_scores = flex.double() thresholds = flex.double() n_clusters = flex.size_t() for threshold in distances[1:]: cluster_labels = self.cluster_labels.deep_copy() labels = hierarchy.fcluster(linkage_matrix, threshold - eps, criterion='distance').tolist() counts = [labels.count(l) for l in set(labels)] if len(set(counts)) > 1: # only equal-sized clusters are valid continue n = len(set(labels)) if n == 1: continue for i in range(len(labels)): cluster_labels.set_selected(self.cluster_labels == i, int(labels[i] - 1)) silhouette_avg = metrics.silhouette_score( X, cluster_labels.as_numpy_array(), metric='cosine') # Compute the silhouette scores for each sample sample_silhouette_values = metrics.silhouette_samples( X, cluster_labels.as_numpy_array(), metric='cosine') silhouette_avg = sample_silhouette_values.mean() silhouette_scores.append(silhouette_avg) thresholds.append(threshold) n_clusters.append(n) count_negative = (sample_silhouette_values < 0).sum() logger.info('Clustering:') logger.info(' Number of clusters: %i' % n) logger.info(' Threshold score: %.3f (%.1f deg)' % (threshold, math.degrees(math.acos(1 - threshold)))) logger.info(' Silhouette score: %.3f' % silhouette_avg) logger.info(' -ve silhouette scores: %.1f%%' % (100 * count_negative / sample_silhouette_values.size)) if self.params.save_plot: plot_silhouette(sample_silhouette_values, cluster_labels.as_numpy_array(), file_name='%ssilhouette_%i.png' % (self.params.plot_prefix, n)) if self.params.cluster.seed.n_clusters is Auto: idx = flex.max_index(silhouette_scores) else: idx = flex.first_index(n_clusters, self.params.cluster.seed.n_clusters) if idx is None: raise Sorry('No valid clustering with %i clusters' % self.params.cluster.seed.n_clusters) if (self.params.cluster.seed.n_clusters is Auto and silhouette_scores[idx] < self.params.cluster.seed.min_silhouette_score): # assume single cluster self.cluster_labels = flex.int(self.cluster_labels.size(), 0) else: threshold = thresholds[idx] - eps labels = hierarchy.fcluster(linkage_matrix, threshold, criterion='distance') cluster_labels = flex.double(self.cluster_labels.size(), -1) for i in range(len(labels)): cluster_labels.set_selected(self.cluster_labels == i, labels[i] - 1) self.cluster_labels = cluster_labels if self.params.save_plot: plot_matrix(1 - ssd.squareform(dist_mat), linkage_matrix, '%sseed_clustering_cos_angle_matrix.png' % self.params.plot_prefix, color_threshold=threshold) plot_dendrogram(linkage_matrix, '%sseed_clustering_cos_angle_dendrogram.png' % self.params.plot_prefix, color_threshold=threshold) return self.cluster_labels
def run (args, image = None): from xfel import radial_average from scitbx.array_family import flex import os, sys import dxtbx # Parse input try: n = len(args) except Exception: params = args else: user_phil = [] for arg in args: if (not "=" in arg): try : user_phil.append(libtbx.phil.parse("""file_path=%s""" % arg)) except ValueError: raise Sorry("Unrecognized argument '%s'" % arg) else: try: user_phil.append(libtbx.phil.parse(arg)) except RuntimeError as e: raise Sorry("Unrecognized argument '%s' (error: %s)" % (arg, str(e))) params = master_phil.fetch(sources=user_phil).extract() if image is None: if params.file_path is None or len(params.file_path) == 0 or not all([os.path.isfile(f) for f in params.file_path]): master_phil.show() raise Usage("file_path must be defined (either file_path=XXX, or the path alone).") assert params.n_bins is not None assert params.verbose is not None assert params.output_bins is not None # Allow writing to a file instead of stdout if params.output_file is None: logger = sys.stdout else: logger = open(params.output_file, 'w') logger.write("%s "%params.output_file) if params.show_plots: from matplotlib import pyplot as plt import numpy as np colormap = plt.cm.gist_ncar plt.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, len(params.file_path))]) if params.mask is not None: params.mask = easy_pickle.load(params.mask) if image is None: iterable = params.file_path load_func = lambda x: dxtbx.load(x) else: iterable = [image] load_func = lambda x: x # Iterate over each file provided for item in iterable: img = load_func(item) try: n_images = img.get_num_images() subiterable = xrange(n_images) except AttributeError: n_images = None subiterable = [0] for image_number in subiterable: if n_images is None: beam = img.get_beam() detector = img.get_detector() else: beam = img.get_beam(image_number) detector = img.get_detector(image_number) s0 = col(beam.get_s0()) # Search the detector for the panel farthest from the beam. The number of bins in the radial average will be # equal to the farthest point from the beam on the detector, in pixels, unless overridden at the command line panel_res = [p.get_max_resolution_at_corners(s0) for p in detector] farthest_panel = detector[panel_res.index(min(panel_res))] size2, size1 = farthest_panel.get_image_size() corners = [(0,0), (size1-1,0), (0,size2-1), (size1-1,size2-1)] corners_lab = [col(farthest_panel.get_pixel_lab_coord(c)) for c in corners] corner_two_thetas = [farthest_panel.get_two_theta_at_pixel(s0, c) for c in corners] extent_two_theta = max(corner_two_thetas) max_corner = corners_lab[corner_two_thetas.index(extent_two_theta)] extent = int(math.ceil(max_corner.length()*math.sin(extent_two_theta)/max(farthest_panel.get_pixel_size()))) extent_two_theta *= 180/math.pi if params.n_bins < extent: params.n_bins = extent # These arrays will store the radial average info sums = flex.double(params.n_bins) * 0 sums_sq = flex.double(params.n_bins) * 0 counts = flex.int(params.n_bins) * 0 if n_images is None: all_data = img.get_raw_data() else: all_data = img.get_raw_data(image_number) if not isinstance(all_data, tuple): all_data = (all_data,) for tile, (panel, data) in enumerate(zip(detector, all_data)): if params.mask is None: mask = flex.bool(flex.grid(data.focus()), True) else: mask = params.mask[tile] if hasattr(data,"as_double"): data = data.as_double() logger.flush() if params.verbose: logger.write("Average intensity tile %d: %9.3f\n"%(tile, flex.mean(data))) logger.write("N bins: %d\n"%params.n_bins) logger.flush() x1,y1,x2,y2 = 0,0,panel.get_image_size()[1],panel.get_image_size()[0] bc = panel.get_beam_centre_px(beam.get_s0()) bc = int(round(bc[1])), int(round(bc[0])) # compute the average radial_average(data,mask,bc,sums,sums_sq,counts,panel.get_pixel_size()[0],panel.get_distance(), (x1,y1),(x2,y2)) # average the results, avoiding division by zero results = sums.set_selected(counts <= 0, 0) results /= counts.set_selected(counts <= 0, 1).as_double() if params.median_filter_size is not None: logger.write("WARNING, the median filter is not fully propogated to the variances\n") from scipy.ndimage.filters import median_filter results = flex.double(median_filter(results.as_numpy_array(), size = params.median_filter_size)) # calculate standard devations stddev_sel = ((sums_sq-sums*results) >= 0) & (counts > 0) std_devs = flex.double(len(sums), 0) std_devs.set_selected(stddev_sel, (sums_sq.select(stddev_sel)-sums.select(stddev_sel)* \ results.select(stddev_sel))/counts.select(stddev_sel).as_double()) std_devs = flex.sqrt(std_devs) twotheta = flex.double(xrange(len(results)))*extent_two_theta/params.n_bins q_vals = 4*math.pi*flex.sin(math.pi*twotheta/360)/beam.get_wavelength() if params.low_max_two_theta_limit is None: subset = results else: subset = results.select(twotheta >= params.low_max_two_theta_limit) max_result = flex.max(subset) if params.x_axis == 'two_theta': xvals = twotheta max_x = twotheta[flex.first_index(results, max_result)] elif params.x_axis == 'q': xvals = q_vals max_x = q_vals[flex.first_index(results, max_result)] for i in xrange(len(results)): val = xvals[i] if params.output_bins and "%.3f"%results[i] != "nan": #logger.write("%9.3f %9.3f\n"% (val,results[i])) #.xy format for Rex.cell. logger.write("%9.3f %9.3f %9.3f\n"%(val,results[i],std_devs[i])) #.xye format for GSASII #logger.write("%.3f %.3f %.3f\n"%(val,results[i],ds[i])) # include calculated d spacings logger.write("Maximum %s: %f, value: %f\n"%(params.x_axis, max_x, max_result)) if params.show_plots: if params.plot_x_max is not None: results = results.select(xvals <= params.plot_x_max) xvals = xvals.select(xvals <= params.plot_x_max) if params.normalize: plt.plot(xvals.as_numpy_array(),(results/flex.max(results)).as_numpy_array(),'-') else: plt.plot(xvals.as_numpy_array(),results.as_numpy_array(),'-') if params.x_axis == 'two_theta': plt.xlabel("2 theta") elif params.x_axis == 'q': plt.xlabel("q") plt.ylabel("Avg ADUs") if params.plot_y_max is not None: plt.ylim(0, params.plot_y_max) if params.show_plots: #plt.legend([os.path.basename(os.path.splitext(f)[0]) for f in params.file_path], ncol=2) plt.show() return xvals, results