def cull_braggs(self, count): if count == 0: window = 0 else: window = len(self.bragg_times)/count if count <= 0 or window < 1: self.culled_braggs = self.braggs self.culled_bragg_times = self.bragg_times self.culled_distances = self.distances self.culled_sifoils = self.sifoils self.culled_wavelengths = self.wavelengths self.culled_indexed = self.indexed return self.culled_braggs = flex.double() self.culled_bragg_times = flex.double() self.culled_distances = flex.double() self.culled_sifoils = flex.double() self.culled_wavelengths = flex.double() self.culled_indexed = flex.bool() for i in range(count): braggs = self.braggs[i*int(window):(i+1)*int(window)] idx = int(i*window) + flex.max_index(braggs) self.culled_braggs .append(self.braggs[idx]) self.culled_bragg_times.append(self.bragg_times[idx]) self.culled_distances .append(self.distances[idx]) self.culled_sifoils .append(self.sifoils[idx]) self.culled_wavelengths.append(self.wavelengths[idx]) self.culled_indexed .append(self.indexed[idx])
def finite_difference_test(self,g): """ Run basic gradient test. compare numerical estimate gradient to the largest calculated one. using t'(x)=(t(x+d)-t(x-d))/(2d) Argument: g : gradient, flex array """ if(self.fmodel.r_work()>1.e-3): g = g.as_double() d = 1.e-5 # find the index of the max gradient value i_g_max = flex.max_index(flex.abs(g)) x_d = self.x # calc t(x+d) x_d[i_g_max] = self.x[i_g_max] + d self.update_model_sites(x = x_d) self.fmodel.update_xray_structure(update_f_calc=True) t1,_ = self.compute_functional_and_gradients(compute_gradients=False) # calc t(x-d) x_d[i_g_max] = self.x[i_g_max] - d self.update_model_sites(x = x_d) del x_d self.fmodel.update_xray_structure(update_f_calc=True) t2,_ = self.compute_functional_and_gradients(compute_gradients=False) # Return fmodel to the correct coordinates values self.update_model_sites(x = self.x) self.fmodel.update_xray_structure(update_f_calc=True) self.buffer_max_grad.append(g[i_g_max]) self.buffer_calc_grad.append((t1-t2)/(d*2))
def _filter_maximum_centroid(self, coords, values, spots, cpos): """Filter the reflections by the distance between the maximum pixel value and the centroid position. If the centroid is a greater than the maximum separation from maximum pixel (in pixel coords) then discard. Params: coords The list of coordinates values The list of values cpos The list of centroids Returns: An index list of valid spots """ from scitbx.array_family import flex from scitbx import matrix index = [] for si, (s, c) in enumerate(zip(spots, cpos)): im = flex.max_index(flex.int([values[i] for i in s])) xc = matrix.col(c) xm = matrix.col(coords[s[im]]) if (xc - xm).length() <= self._max_separation: index.append(si) # Return the list of indices return index
def reduce_raw_data(raw_data, qmax, bandwidth, level=0.05, q_background=None): print "delta_q is ", bandwidth if qmax > raw_data.q[-1]: qmax = raw_data.q[-1] ### Get rid of noisy signall at very low q range ### qmin_indx = flex.max_index(raw_data.i) qmin = raw_data.q[qmin_indx] new_data = get_q_array_uniform_body(raw_data, q_min=qmin, q_max=qmax, level=level) qmax = new_data.q[-1] print "LEVEL=%f" % level, "and Q_MAX=%f" % qmax raw_q = raw_data.q[qmin_indx:] raw_i = raw_data.i[qmin_indx:] raw_s = raw_data.s[qmin_indx:] ### Take care of the background (set zero at very high q) ### if (q_background is not None): cutoff = flex.bool(raw_q > q_background) q_bk_indx = flex.last_index(cutoff, False) if (q_bk_indx < raw_q.size()): bkgrd = flex.mean(raw_i[q_bk_indx:]) print "Background correction: I=I-background, where background=", bkgrd raw_i = flex.abs(raw_i - bkgrd) q = flex.double(range(int( (qmax - qmin) / bandwidth) + 1)) * bandwidth + qmin raw_data.i = flex.linear_interpolation(raw_q, raw_i, q) raw_data.s = flex.linear_interpolation(raw_q, raw_s, q) raw_data.q = q return raw_data
def _filter_maximum_centroid(self, coords, values, spots, cpos): '''Filter the reflections by the distance between the maximum pixel value and the centroid position. If the centroid is a greater than the maximum separation from maximum pixel (in pixel coords) then discard. Params: coords The list of coordinates values The list of values cpos The list of centroids Returns: An index list of valid spots ''' from scitbx.array_family import flex from scitbx import matrix index = [] for si, (s, c) in enumerate(zip(spots, cpos)): im = flex.max_index(flex.int([values[i] for i in s])) xc = matrix.col(c) xm = matrix.col(coords[s[im]]) if (xc - xm).length() <= self._max_separation: index.append(si) # Return the list of indices return index
def normalize(self, q, curve): last_height = flex.mean(curve[curve.size() - 10:curve.size() - 1]) funct = q / flex.max(q) * last_height d = curve - funct max_index = flex.max_index(d) loc = q[max_index] scale = curve[max_index] return loc, scale
def _determine_dimensions(self): if self.params.dimensions is Auto and self.target.dim == 2: self.params.dimensions = 2 elif self.params.dimensions is Auto: logger.info("=" * 80) logger.info( "\nAutomatic determination of number of dimensions for analysis" ) dimensions = [] functional = [] termination_params = copy.deepcopy(self.params.termination_params) termination_params.max_iterations = min( 20, termination_params.max_iterations ) for dim in range(1, self.target.dim + 1): logger.debug("Testing dimension: %i", dim) self.target.set_dimensions(dim) self._optimise(termination_params) dimensions.append(dim) functional.append(self.minimizer.f) # Find the elbow point of the curve, in the same manner as that used by # distl spotfinder for resolution method 1 (Zhang et al 2006). # See also dials/algorithms/spot_finding/per_image_analysis.py x = flex.double(dimensions) y = flex.double(functional) slopes = (y[-1] - y[:-1]) / (x[-1] - x[:-1]) p_m = flex.min_index(slopes) x1 = matrix.col((x[p_m], y[p_m])) x2 = matrix.col((x[-1], y[-1])) gaps = flex.double() v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize() for i in range(p_m, len(x)): x0 = matrix.col((x[i], y[i])) r = x1 - x0 g = abs(v.dot(r)) gaps.append(g) p_g = flex.max_index(gaps) x_g = x[p_g + p_m] logger.info( dials.util.tabulate( zip(dimensions, functional), headers=("Dimensions", "Functional") ) ) logger.info("Best number of dimensions: %i" % x_g) self.target.set_dimensions(int(x_g)) logger.info("Using %i dimensions for analysis" % self.target.dim)
def __init__(O, points, epsilon=1e-2): """\ Computation of Minimum-Volume Covering Ellipsoid using the Khachiyan Algorithm. Based on a Python implementation by Raj Rajashankar (ANL, Nov 2011), which in turn was based on a Matlab script by Nima Moshtagh (2009, http://stackoverflow.com/questions/1768197/bounding-ellipse/1768440#1768440). Caveats: - center and radii are correct, but rotation may permute axes - scales with the square of the number of points """ d = 3 # d is the dimension n = points.size() assert n > 0 # from scitbx.array_family import flex p = points.as_double() p.reshape(flex.grid(n, 3)) p = p.matrix_transpose() q = p.deep_copy() q.resize(flex.grid(4, n), 1) # u = flex.double(n, 1/n) umx = flex.double(flex.grid(n, n), 0) # err = epsilon + 1 while (err > epsilon): umx.matrix_diagonal_set_in_place(u) x_inv = q.matrix_multiply(umx).matrix_multiply_transpose( q).matrix_inversion() m = q.matrix_transpose_multiply( x_inv).matrix_multiply(q).matrix_diagonal() j = flex.max_index(m) maximum = m[j] ascent_step_size = (maximum-d-1)/((d+1)*(maximum-1)) new_u = (1 - ascent_step_size) * u new_u[j] += ascent_step_size err = flex.sum_sq(new_u - u)**0.5 u = new_u # center = p.matrix_multiply(u) umx.matrix_diagonal_set_in_place(u) t1 = p.matrix_multiply(umx).matrix_multiply_transpose(p) t2 = center.matrix_outer_product(center) a = (t1 - t2).matrix_inversion() / d # import scitbx.linalg.svd svd = scitbx.linalg.svd.real(a, accumulate_u=False, accumulate_v=True) size = 1.0/flex.sqrt(svd.sigma) from scitbx import matrix O.center = matrix.col(center) O.radii = matrix.col(size) O.rotation = matrix.sqr(svd.v)
def plot_centroid_weights_histograms(reflections, n_slots=50): from matplotlib import pyplot from scitbx.array_family import flex variances = flex.vec3_double([r.centroid_variance for r in reflections]) vx, vy, vz = variances.parts() idx = (vx > 0).__and__(vy > 0).__and__(vz > 0) vx = vx.select(idx) vy = vy.select(idx) vz = vz.select(idx) wx = 1 / vx wy = 1 / vy wz = 1 / vz wx = flex.log(wx) wy = flex.log(wy) wz = flex.log(wz) hx = flex.histogram(wx, n_slots=n_slots) hy = flex.histogram(wy, n_slots=n_slots) hz = flex.histogram(wz, n_slots=n_slots) fig = pyplot.figure() idx2 = flex.max_index(wx) idx3 = flex.int(range(len(reflections))).select(idx)[idx2] print(reflections[idx3]) return # outliers = reflections.select(wx > 50) # for refl in outliers: # print refl for i, h in enumerate([hx, hy, hz]): ax = fig.add_subplot(311 + i) slots = h.slots().as_double() bins, data = hist_outline(h) log_scale = True if log_scale: data.set_selected( data == 0, 0.1 ) # otherwise lines don't get drawn when we have some empty bins ax.set_yscale("log") ax.plot(bins, data, "-k", linewidth=2) # pyplot.suptitle(title) data_min = min( [slot.low_cutoff for slot in h.slot_infos() if slot.n > 0]) data_max = max( [slot.low_cutoff for slot in h.slot_infos() if slot.n > 0]) ax.set_xlim(data_min, data_max + h.slot_width()) pyplot.show()
def fit_3_gaussians(self, histogram): fitted_gaussians = [] low_idx = self.work_params.fit_limits[0] high_idx = self.work_params.fit_limits[1] slot_centers = flex.double( range(self.work_params.first_slot_value, self.work_params.first_slot_value + len(histogram))) free_x = slot_centers[low_idx:high_idx] slots = flex.double(histogram.astype(np.float64)) free_y = slots[low_idx:high_idx] total_population = flex.sum(free_y) # zero_mean = 0. # originally intended mean=0 maxidx = flex.max_index( free_y) # but if dark subtraction (pedstal correction) is off zero_mean = free_x[maxidx] # use this non-zero maximum instead zero_amplitude = flex.max(free_y) assert 1. / zero_amplitude #guard against division by zero zero_sigma = self.work_params.gaussian_3.zero_sigma inelastic_amplitude = 0.001 elastic_amplitude = 0.001 elastic_sigma = self.work_params.gaussian_3.zero_sigma #elastic_mean = zero_mean+ zero_sigma*self.work_params.gaussian_3.elastic_gain_to_sigma #** helper = self.helper_3_gaussian_factory( initial_estimates=(zero_mean, 1.0, zero_sigma, inelastic_amplitude, elastic_amplitude, elastic_sigma), constants=flex.double(( self.work_params.gaussian_3.inelastic_gain_to_sigma, self.work_params.gaussian_3.elastic_gain_to_sigma, )), free_x=free_x, free_y=free_y / zero_amplitude ) # put y values on 0->1 scale for normal eqn solving helper.restart() iterations = normal_eqns_solving.levenberg_marquardt_iterations( non_linear_ls=helper, n_max_iterations=7, gradient_threshold=1.E-3) fitted_gaussians = helper.as_gaussians() for item in fitted_gaussians: item.params = (item.params[0] * zero_amplitude, item.params[1], item.params[2]) # convert back to full scale return fitted_gaussians
def get_stats(self): # find median energy: cumulative_total = flex.sum(self.corrected_response) max_index = flex.max_index(self.corrected_response) self.mode = self.deduced_energy[max_index] half_cum = cumulative_total / 2. running_cum = 0 for idx in range(len(self.deduced_energy)): running_cum += self.corrected_response[idx] if running_cum > half_cum: break self.median = self.deduced_energy[idx] self.max_response = flex.max(self.corrected_response) n_channels_fwhm = (self.corrected_response > (0.5 * self.max_response)).count(True) self.fwhm_eV = n_channels_fwhm * 10. # 10 eV/channel delta_E_over_E = self.fwhm_eV / self.median print("delta E / E = %.5f" % delta_E_over_E)
def _determine_dimensions(self): if self.params.dimensions is Auto and self.target.dim == 2: self.params.dimensions = 2 elif self.params.dimensions is Auto: dimensions = [] functional = [] explained_variance = [] explained_variance_ratio = [] for dim in range(1, self.target.dim + 1): self.target.set_dimensions(dim) self._optimise() logger.info("Functional: %g" % self.minimizer.f) self._principal_component_analysis() dimensions.append(dim) functional.append(self.minimizer.f) explained_variance.append(self.explained_variance) explained_variance_ratio.append(self.explained_variance_ratio) # Find the elbow point of the curve, in the same manner as that used by # distl spotfinder for resolution method 1 (Zhang et al 2006). # See also dials/algorithms/spot_finding/per_image_analysis.py x = flex.double(dimensions) y = flex.double(functional) slopes = (y[-1] - y[:-1]) / (x[-1] - x[:-1]) p_m = flex.min_index(slopes) x1 = matrix.col((x[p_m], y[p_m])) x2 = matrix.col((x[-1], y[-1])) gaps = flex.double() v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize() for i in range(p_m, len(x)): x0 = matrix.col((x[i], y[i])) r = x1 - x0 g = abs(v.dot(r)) gaps.append(g) p_g = flex.max_index(gaps) x_g = x[p_g + p_m] logger.info("Best number of dimensions: %i" % x_g) self.target.set_dimensions(int(x_g))
def plot_centroid_weights_histograms(reflections, n_slots=50): from matplotlib import pyplot from scitbx.array_family import flex variances = flex.vec3_double([r.centroid_variance for r in reflections]) vx, vy, vz = variances.parts() idx = (vx > 0).__and__(vy > 0).__and__(vz > 0) vx = vx.select(idx) vy = vy.select(idx) vz = vz.select(idx) wx = 1/vx wy = 1/vy wz = 1/vz wx = flex.log(wx) wy = flex.log(wy) wz = flex.log(wz) hx = flex.histogram(wx, n_slots=n_slots) hy = flex.histogram(wy, n_slots=n_slots) hz = flex.histogram(wz, n_slots=n_slots) fig = pyplot.figure() idx2 = flex.max_index(wx) idx3 = flex.int(range(len(reflections))).select(idx)[idx2] print reflections[idx3] return #outliers = reflections.select(wx > 50) #for refl in outliers: #print refl for i, h in enumerate([hx, hy, hz]): ax = fig.add_subplot(311+i) slots = h.slots().as_double() bins, data = hist_outline(h) log_scale = True if log_scale: data.set_selected(data == 0, 0.1) # otherwise lines don't get drawn when we have some empty bins ax.set_yscale("log") ax.plot(bins, data, '-k', linewidth=2) #pyplot.suptitle(title) data_min = min([slot.low_cutoff for slot in h.slot_infos() if slot.n > 0]) data_max = max([slot.low_cutoff for slot in h.slot_infos() if slot.n > 0]) ax.set_xlim(data_min, data_max+h.slot_width()) pyplot.show()
def fit_one_histogram_two_gaussians(self, histogram): fitted_gaussians = [] GAIN_TO_SIGMA = self.work_params.fudge_factor.gain_to_sigma low_idx = self.work_params.fit_limits[0] high_idx = self.work_params.fit_limits[1] slot_centers = flex.double( range(self.work_params.first_slot_value, self.work_params.first_slot_value + len(histogram))) free_x = slot_centers[low_idx:high_idx] #print list(free_x) slots = flex.double(histogram.astype(np.float64)) free_y = slots[low_idx:high_idx] # zero_mean = 0. # originally intended mean=0 maxidx = flex.max_index( free_y) # but if dark subtraction (pedstal correction) is off zero_mean = free_x[maxidx] # use this non-zero maximum instead zero_amplitude = flex.max(free_y) assert 1. / zero_amplitude #guard against division by zero total_population = flex.sum(free_y) zero_sigma = self.work_params.estimated_gain / GAIN_TO_SIGMA one_amplitude = 0.001 helper = self.per_pixel_helper_factory( initial_estimates=(zero_mean, 1.0, zero_sigma, one_amplitude), GAIN_TO_SIGMA=GAIN_TO_SIGMA, free_x=free_x, free_y=free_y / zero_amplitude ) # put y values on 0->1 scale for normal eqn solving helper.restart() iterations = normal_eqns_solving.levenberg_marquardt_iterations( non_linear_ls=helper, n_max_iterations=7, gradient_threshold=1.E-3) print("current values after iterations", list(helper.x), end=' ') fitted_gaussians = helper.as_gaussians() for item in fitted_gaussians: item.params = (item.params[0] * zero_amplitude, item.params[1], item.params[2]) # convert back to full scale return fitted_gaussians
def fit_3_gaussians(self,histogram): fitted_gaussians = [] low_idx = self.work_params.fit_limits[0] high_idx = self.work_params.fit_limits[1] slot_centers = flex.double(xrange(self.work_params.first_slot_value, self.work_params.first_slot_value + len(histogram))) free_x = slot_centers[low_idx:high_idx] slots = flex.double(histogram.astype(np.float64)) free_y = slots[low_idx:high_idx] total_population = flex.sum(free_y) # zero_mean = 0. # originally intended mean=0 maxidx = flex.max_index(free_y) # but if dark subtraction (pedstal correction) is off zero_mean = free_x[maxidx] # use this non-zero maximum instead zero_amplitude = flex.max(free_y) assert 1./zero_amplitude #guard against division by zero zero_sigma = self.work_params.gaussian_3.zero_sigma inelastic_amplitude = 0.001 elastic_amplitude = 0.001 elastic_sigma = self.work_params.gaussian_3.zero_sigma #elastic_mean = zero_mean+ zero_sigma*self.work_params.gaussian_3.elastic_gain_to_sigma #** helper = self.helper_3_gaussian_factory(initial_estimates = (zero_mean, 1.0, zero_sigma, inelastic_amplitude, elastic_amplitude, elastic_sigma), constants=flex.double((self.work_params.gaussian_3.inelastic_gain_to_sigma, self.work_params.gaussian_3.elastic_gain_to_sigma, )), free_x = free_x, free_y = free_y/zero_amplitude) # put y values on 0->1 scale for normal eqn solving helper.restart() iterations = normal_eqns_solving.levenberg_marquardt_iterations( non_linear_ls = helper, n_max_iterations = 7, gradient_threshold = 1.E-3) fitted_gaussians = helper.as_gaussians() for item in fitted_gaussians: item.params = (item.params[0] * zero_amplitude, item.params[1], item.params[2]) # convert back to full scale return fitted_gaussians
def __init__(self, x_obs, y_obs): """Fit a gaussian to points (x_obs, y_obs): f(x) = A exp(-(x - mu)**2 / (2 * sigma**2)) :param x_obs: x-coordinates of the data :type x_obs: flex.double :param y_obs: y-coordinates of the data :type y_obs: flex.double """ self.x_obs = x_obs self.y_obs = y_obs max_i = flex.max_index(y_obs) # quick estimate of scale and mean to give the optimiser a helping hand scale = y_obs[max_i] mu = x_obs[max_i] sigma = 1 # can we make a simple estimate of sigma too? fit = gaussian_fit(x_obs, y_obs, [gaussian(scale, mu, sigma)]) self.a = fit.gaussians[0].a self.b = fit.gaussians[0].b self.c = fit.gaussians[0].c
def fit_one_histogram_two_gaussians(self,histogram): fitted_gaussians = [] GAIN_TO_SIGMA = self.work_params.fudge_factor.gain_to_sigma low_idx = self.work_params.fit_limits[0] high_idx = self.work_params.fit_limits[1] slot_centers = flex.double(xrange(self.work_params.first_slot_value, self.work_params.first_slot_value + len(histogram))) free_x = slot_centers[low_idx:high_idx] #print list(free_x) slots = flex.double(histogram.astype(np.float64)) free_y = slots[low_idx:high_idx] # zero_mean = 0. # originally intended mean=0 maxidx = flex.max_index(free_y) # but if dark subtraction (pedstal correction) is off zero_mean = free_x[maxidx] # use this non-zero maximum instead zero_amplitude = flex.max(free_y) assert 1./zero_amplitude #guard against division by zero total_population = flex.sum(free_y) zero_sigma = self.work_params.estimated_gain / GAIN_TO_SIGMA one_amplitude = 0.001 helper = self.per_pixel_helper_factory(initial_estimates = (zero_mean, 1.0, zero_sigma, one_amplitude), GAIN_TO_SIGMA=GAIN_TO_SIGMA, free_x = free_x, free_y = free_y/zero_amplitude) # put y values on 0->1 scale for normal eqn solving helper.restart() iterations = normal_eqns_solving.levenberg_marquardt_iterations( non_linear_ls = helper, n_max_iterations = 7, gradient_threshold = 1.E-3) print "current values after iterations", list(helper.x), fitted_gaussians = helper.as_gaussians() for item in fitted_gaussians: item.params = (item.params[0] * zero_amplitude, item.params[1], item.params[2]) # convert back to full scale return fitted_gaussians
def reduce_raw_data(raw_data, qmax, bandwidth, level=0.05, q_background=None): print print " ==== Data reduction ==== " print print " Preprocessing of data increases efficiency of shape retrieval procedure." print print " - Interpolation stepsize : %4.3e" % bandwidth print " - Uniform density criteria: level is set to : %4.3e" % level print " maximum q to consider : %4.3e" % qmax qmin_indx = flex.max_index(raw_data.i) qmin = raw_data.q[qmin_indx] new_data = get_q_array_uniform_body(raw_data, q_min=qmin, q_max=qmax, level=level) qmax = new_data.q[-1] if qmax > raw_data.q[-1]: qmax = raw_data.q[-1] print " Resulting q range to use in search: q start : %4.3e" % qmin print " q stop : %4.3e" % qmax print raw_q = raw_data.q[qmin_indx:] raw_i = raw_data.i[qmin_indx:] raw_s = raw_data.s[qmin_indx:] ### Take care of the background (set zero at very high q) ### if (q_background is not None): cutoff = flex.bool(raw_q > q_background) q_bk_indx = flex.last_index(cutoff, False) if (q_bk_indx < raw_q.size()): bkgrd = flex.mean(raw_i[q_bk_indx:]) print "Background correction: I=I-background, where background=", bkgrd raw_i = flex.abs(raw_i - bkgrd) q = flex.double(range(int( (qmax - qmin) / bandwidth) + 1)) * bandwidth + qmin raw_data.i = flex.linear_interpolation(raw_q, raw_i, q) raw_data.s = flex.linear_interpolation(raw_q, raw_s, q) raw_data.q = q return raw_data
def finite_difference_test(self,g): """ Compare analytical and finite differences gradients. finite_grad_difference_val = abs(analytical - finite differences) """ g = g.as_double() # find the index of the max gradient value i_g_max = flex.max_index(flex.abs(g)) # Set displacement for finite gradient calculation d = max(self.x[i_g_max]*1e-6,1e-6) # calc t(x+d) self.x[i_g_max] += d t1,_ = self.compute_functional_and_gradients(compute_gradients=False) # calc t(x-d) self.x[i_g_max] -= 2*d t2,_ = self.compute_functional_and_gradients(compute_gradients=False) # Return fmodel to the correct coordinates values self.x[i_g_max] += d self.update_xray_structure() finite_gard = (t1-t2)/(d*2) self.finite_grad_difference_val = abs(g[i_g_max] - finite_gard)
def finite_difference_test(self, g): """ Compare analytical and finite differences gradients. finite_grad_difference_val = abs(analytical - finite differences) """ g = g.as_double() # find the index of the max gradient value i_g_max = flex.max_index(flex.abs(g)) # Set displacement for finite gradient calculation d = max(self.x[i_g_max] * 1e-6, 1e-6) # calc t(x+d) self.x[i_g_max] += d t1, _ = self.compute_functional_and_gradients(compute_gradients=False) # calc t(x-d) self.x[i_g_max] -= 2 * d t2, _ = self.compute_functional_and_gradients(compute_gradients=False) # Return fmodel to the correct coordinates values self.x[i_g_max] += d self.update_xray_structure() finite_gard = (t1 - t2) / (d * 2) self.finite_grad_difference_val = abs(g[i_g_max] - finite_gard)
def common_mode(self, img, stddev, mask): """The common_mode() function returns the mode of image stored in the array pointed to by @p img. @p mask must be such that the @p stddev at the selected pixels is greater than zero. @param img 2D integer array of the image @param stddev 2D integer array of the standard deviation of each pixel in @p img @param mask 2D Boolean array, @c True if the pixel is to be included, @c False otherwise @return Mode of the image, as a real number """ # Flatten the image and take out inactive pixels XXX because we # cannot take means and medians of 2D arrays? img_1d = img.as_1d().select(mask.as_1d()).as_double() assert img_1d.size() > 0 if (self.common_mode_correction == "mean"): # The common mode is approximated by the mean of the pixels with # signal-to-noise ratio less than a given threshold. XXX Breaks # if the selection is empty! THRESHOLD_SNR = 2 img_snr = img_1d / stddev.as_double().as_1d().select(mask.as_1d()) return (flex.mean(img_1d.select(img_snr < THRESHOLD_SNR))) elif (self.common_mode_correction == "median"): return (flex.median(img_1d)) # Identify the common-mode correction as the peak histogram of the # histogram of pixel values (the "standard" common-mode correction, as # previously implemented in this class). hist_min = -40 hist_max = 40 n_slots = 100 hist = flex.histogram(img_1d, hist_min, hist_max, n_slots=n_slots) slots = hist.slots() i = flex.max_index(slots) common_mode = list(hist.slot_infos())[i].center() if (self.common_mode_correction == "mode"): return (common_mode) # Determine the common-mode correction from the peak of a single # Gaussian function fitted to the histogram. from scitbx.math.curve_fitting import single_gaussian_fit x = hist.slot_centers() y = slots.as_double() fit = single_gaussian_fit(x, y) scale, mu, sigma = fit.a, fit.b, fit.c self.logger.debug("fitted gaussian: mu=%.3f, sigma=%.3f" %(mu, sigma)) mode = common_mode common_mode = mu if abs(mode-common_mode) > 1000: common_mode = mode # XXX self.logger.debug("delta common mode corrections: %.3f" %(mode-common_mode)) if 0 and abs(mode-common_mode) > 0: #if 0 and skew > 0.5: # view histogram and fitted gaussian from numpy import exp from matplotlib import pyplot x_all = x n, bins, patches = pyplot.hist(section_img.as_1d().as_numpy_array(), bins=n_slots, range=(hist_min, hist_max)) y_all = scale * flex.exp(-flex.pow2(x_all-mu) / (2 * sigma**2)) scale = slots[flex.max_index(slots)] y_all *= scale/flex.max(y_all) pyplot.plot(x_all, y_all) pyplot.show() return (common_mode)
def _silhouette_analysis(self, cluster_labels, linkage_matrix, n_clusters, min_silhouette_score): """Compare valid equal-sized clustering using silhouette scores. Args: cluster_labels (scitbx.array_family.flex.int): linkage_matrix (numpy.ndarray): The hierarchical clustering of centroids of the initial clustering as produced by :func:`scipy.cluster.hierarchy.linkage`. n_clusters (int): Optionally override the automatic determination of the number of clusters. min_silhouette_score (float): The minimum silhouette score to be used in automatic determination of the number of clusters. Returns: cluster_labels (scitbx.array_family.flex.int): A label for each coordinate. """ eps = 1e-6 X = self.coords.as_numpy_array() cluster_labels_input = cluster_labels distances = linkage_matrix[::, 2] distances = np.insert(distances, 0, 0) silhouette_scores = flex.double() thresholds = flex.double() threshold_n_clusters = flex.size_t() for threshold in distances[1:]: cluster_labels = cluster_labels_input.deep_copy() labels = hierarchy.fcluster(linkage_matrix, threshold - eps, criterion="distance").tolist() counts = [labels.count(l) for l in set(labels)] if len(set(counts)) > 1: # only equal-sized clusters are valid continue n = len(set(labels)) if n == 1: continue elif n_clusters is not Auto and n != n_clusters: continue for i in range(len(labels)): cluster_labels.set_selected(cluster_labels_input == i, int(labels[i] - 1)) if len(set(cluster_labels)) == X.shape[0]: # silhouette coefficient not defined if 1 dataset per cluster # not sure what the default value should be sample_silhouette_values = np.full(cluster_labels.size(), 0) else: # Compute the silhouette scores for each sample sample_silhouette_values = metrics.silhouette_samples( X, cluster_labels.as_numpy_array(), metric="cosine") silhouette_avg = sample_silhouette_values.mean() silhouette_scores.append(silhouette_avg) thresholds.append(threshold) threshold_n_clusters.append(n) count_negative = (sample_silhouette_values < 0).sum() logger.info("Clustering:") logger.info(" Number of clusters: %i" % n) logger.info(" Threshold score: %.3f (%.1f deg)" % (threshold, math.degrees(math.acos(1 - threshold)))) logger.info(" Silhouette score: %.3f" % silhouette_avg) logger.info(" -ve silhouette scores: %.1f%%" % (100 * count_negative / sample_silhouette_values.size)) if n_clusters is Auto: idx = flex.max_index(silhouette_scores) else: idx = flex.first_index(threshold_n_clusters, n_clusters) if idx is None: raise Sorry("No valid clustering with %i clusters" % n_clusters) if n_clusters is Auto and silhouette_scores[idx] < min_silhouette_score: # assume single cluster cluster_labels = flex.int(cluster_labels.size(), 0) else: threshold = thresholds[idx] - eps labels = hierarchy.fcluster(linkage_matrix, threshold, criterion="distance") cluster_labels = flex.double(cluster_labels.size(), -1) for i in range(len(labels)): cluster_labels.set_selected(cluster_labels_input == i, float(labels[i] - 1)) return cluster_labels, threshold
def run(args): master_phil = iotbx.phil.parse(master_phil_str) processed = iotbx.phil.process_command_line(args=args, master_string=master_phil_str) args = processed.remaining_args work_params = processed.work.extract() x_offsets = work_params.x_offsets bg_range_min, bg_range_max = work_params.bg_range if work_params.plot_range is not None: x_min, x_max = work_params.plot_range else: x_min, x_max = (0, 385) print bg_range_min, bg_range_max if x_offsets is None: x_offsets = [0] * len(args) legend = work_params.legend linewidth = 2 fontsize = 26 xy_pairs = [] colours = [ "cornflowerblue", "darkmagenta", "darkgreen", "black", "red", "blue", "pink" ] colours[2] = "orangered" colours[1] = "olivedrab" min_background = 1e16 #x_min, x_max = (0, 391) #x_min, x_max = (0, 360) #x_min, x_max = (200, 360) for i, filename in enumerate(args): print filename f = open(filename, 'rb') x, y = zip(*[ line.split() for line in f.readlines() if not line.startswith("#") ]) x = flex.double(flex.std_string(x)) y = flex.double(flex.std_string(y)) if work_params.smoothing.method is not None: savitzky_golay_half_window = work_params.smoothing.savitzky_golay.half_window savitzky_golay_degree = work_params.smoothing.savitzky_golay.degree fourier_cutoff = work_params.smoothing.fourier_filter_cutoff method = work_params.smoothing.method if method == "fourier_filter": assert work_params.smoothing.fourier_filter_cutoff is not None if method == "savitzky_golay": x, y = smoothing.savitzky_golay_filter( x, y, savitzky_golay_half_window, savitzky_golay_degree) elif method == "fourier_filter": x, y = smooth_spectrum.fourier_filter( x, y, cutoff_frequency=fourier_cutoff) x += x_offsets[i] y = y.select((x <= x_max) & (x > 0)) x = x.select((x <= x_max) & (x > 0)) bg_sel = (x > bg_range_min) & (x < bg_range_max) xy_pairs.append((x, y)) min_background = min(min_background, flex.mean(y.select(bg_sel)) / flex.max(y)) y -= min_background print "Peak maximum at: %i" % int(x[flex.max_index(y)]) for i, filename in enumerate(args): if legend is None: label = filename else: print legend assert len(legend) == len(args) label = legend[i] x, y = xy_pairs[i] if i == -1: x, y = interpolate(x, y) x, y = savitzky_golay_filter(x, y) #if i == 0: #y -= 10 bg_sel = (x > bg_range_min) & (x < bg_range_max) y -= (flex.mean(y.select(bg_sel)) - min_background * flex.max(y)) #y -= flex.min(y) y_min = flex.min(y.select(bg_sel)) if i == -2: y += 0.2 * flex.max(y) print "minimum at: %i" % int(x[flex.min_index(y)]), flex.min(y) #print "fwhm: %.2f" %full_width_half_max(x, y) y /= flex.max(y) if len(colours) > i: pyplot.plot(x, y, label=label, linewidth=linewidth, color=colours[i]) else: pyplot.plot(x, y, label=label, linewidth=linewidth) pyplot.ylabel("Intensity", fontsize=fontsize) pyplot.xlabel("Pixel column", fontsize=fontsize) if i > 0: # For some reason the line below causes a floating point error if we only # have one plot (i.e. i==0) legend = pyplot.legend(loc=2) for t in legend.get_texts(): t.set_fontsize(fontsize) axes = pyplot.axes() for tick in axes.xaxis.get_ticklabels(): tick.set_fontsize(20) for tick in axes.yaxis.get_ticklabels(): tick.set_fontsize(20) pyplot.ylim(0, 1) pyplot.xlim(x_min, x_max) ax = pyplot.axes() #ax.xaxis.set_minor_locator(pyplot.MultipleLocator(5)) #ax.yaxis.set_major_locator(pyplot.MultipleLocator(0.1)) #ax.yaxis.set_minor_locator(pyplot.MultipleLocator(0.05)) pyplot.show()
def estimate_global_threshold(image, mask=None): from scitbx.array_family import flex from scitbx import matrix n_above_threshold = flex.size_t() threshold = flex.double() for i in range(1, 20): g = 1.5**i g = int(g) n_above_threshold.append((image > g).count(True)) threshold.append(g) # Find the elbow point of the curve, in the same manner as that used by # distl spotfinder for resolution method 1 (Zhang et al 2006). # See also dials/algorithms/spot_finding/per_image_analysis.py x = threshold.as_double() y = n_above_threshold.as_double() slopes = (y[-1] - y[:-1])/(x[-1] - x[:-1]) p_m = flex.min_index(slopes) x1 = matrix.col((x[p_m], y[p_m])) x2 = matrix.col((x[-1], y[-1])) gaps = flex.double() v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize() for i in range(p_m, len(x)): x0 = matrix.col((x[i], y[i])) r = x1 - x0 g = abs(v.dot(r)) gaps.append(g) mv = flex.mean_and_variance(gaps) s = mv.unweighted_sample_standard_deviation() p_k = flex.max_index(gaps) g_k = gaps[p_k] p_g = p_k #x_g = x[p_g + p_m] #y_g = y[p_g + p_m] #x_g = x[p_g + p_m -1] #y_g = y[p_g + p_m -1] # more conservative, choose point 2 left of the elbow point x_g = x[p_g + p_m -2] y_g = y[p_g + p_m -2] #from matplotlib import pyplot #pyplot.scatter(threshold, n_above_threshold) ##for i in range(len(threshold)-1): ##pyplot.plot([threshold[i], threshold[-1]], ##[n_above_threshold[i], n_above_threshold[-1]]) ##for i in range(1, len(threshold)): ##pyplot.plot([threshold[0], threshold[i]], ##[n_above_threshold[0], n_above_threshold[i]]) #pyplot.plot( #[threshold[p_m], threshold[-1]], [n_above_threshold[p_m], n_above_threshold[-1]]) #pyplot.plot( #[x_g, threshold[-1]], [y_g, n_above_threshold[-1]]) #pyplot.show() return x_g
def estimate_global_threshold(image, mask=None, plot=False): n_above_threshold = flex.size_t() threshold = flex.double() for i in range(1, 20): g = 1.5**i g = int(g) n_above_threshold.append((image > g).count(True)) threshold.append(g) # Find the elbow point of the curve, in the same manner as that used by # distl spotfinder for resolution method 1 (Zhang et al 2006). # See also dials/algorithms/spot_finding/per_image_analysis.py x = threshold.as_double() y = n_above_threshold.as_double() slopes = (y[-1] - y[:-1]) / (x[-1] - x[:-1]) p_m = flex.min_index(slopes) x1 = matrix.col((x[p_m], y[p_m])) x2 = matrix.col((x[-1], y[-1])) gaps = flex.double() v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize() for i in range(p_m, len(x)): x0 = matrix.col((x[i], y[i])) r = x1 - x0 g = abs(v.dot(r)) gaps.append(g) p_g = flex.max_index(gaps) x_g_ = x[p_g + p_m] y_g_ = y[p_g + p_m] # more conservative, choose point 2 left of the elbow point x_g = x[p_g + p_m - 2] # y_g = y[p_g + p_m - 2] if plot: from matplotlib import pyplot pyplot.figure(figsize=(16, 12)) pyplot.scatter(threshold, n_above_threshold, marker="+") # for i in range(len(threshold)-1): # pyplot.plot([threshold[i], threshold[-1]], # [n_above_threshold[i], n_above_threshold[-1]]) # for i in range(1, len(threshold)): # pyplot.plot([threshold[0], threshold[i]], # [n_above_threshold[0], n_above_threshold[i]]) pyplot.plot([x_g, x_g], pyplot.ylim()) pyplot.plot( [threshold[p_m], threshold[-1]], [n_above_threshold[p_m], n_above_threshold[-1]], ) pyplot.plot([x_g_, threshold[-1]], [y_g_, n_above_threshold[-1]]) pyplot.xlabel("Threshold") pyplot.ylabel("Number of pixels above threshold") pyplot.savefig("global_threshold.png") pyplot.clf() return x_g
def fit_one_histogram(self, pixel, n_gaussians=2): histogram = self.histograms[pixel] fitted_gaussians = [] slot_centers = histogram.slot_centers() slots = histogram.slots().as_double() zero_peak_gaussian = None for i in range(n_gaussians): if i == 0: lower_threshold = -1000 upper_threshold = 0.4 * self.estimated_gain mean = 0 fit = self.single_peak_fit( histogram, lower_threshold, upper_threshold, mean, zero_peak_gaussian=zero_peak_gaussian) hist_max = flex.max(histogram.slots()) if abs(fit.functions[0].params[0] - hist_max) / hist_max > 0.1: upper_threshold = 0.3 * self.estimated_gain fit = self.single_peak_fit( histogram, lower_threshold, upper_threshold, mean, zero_peak_gaussian=zero_peak_gaussian) else: y_obs = histogram.slots().as_double() x = histogram.slot_centers() y_calc = flex.double(y_obs.size(), 0) for g in fitted_gaussians: y_calc += g(x) residual = y_obs - y_calc # triangular smoothing of residual to find peak position residual = sliding_average(residual) residual = sliding_average(residual) for n in (4, 5, 6, 7, 8): #for n in (5, 6, 7, 8): # we assume that the peaks are separated by at least n sigma n_sigma = abs(n * fitted_gaussians[0].params[2]) slot_i = histogram.get_i_slot( fitted_gaussians[i - 1].params[1] + n_sigma) max_slot_i = flex.max_index(residual[slot_i:]) + slot_i mean = slot_centers[max_slot_i] lower_threshold = mean - 0.3 * ( mean - fitted_gaussians[0].params[1]) upper_threshold = mean + 0.4 * ( mean - fitted_gaussians[0].params[1]) #print lower_threshold, mean, upper_threshold #zero_peak_gaussian = None fit = self.single_peak_fit( histogram, lower_threshold, upper_threshold, mean, zero_peak_gaussian=zero_peak_gaussian) if (fit.functions[0].params[1] > fitted_gaussians[-1].params[1] and fit.functions[0].sigma > 0.5 * fitted_gaussians[-1].sigma and (fit.functions[0].params[1] - fitted_gaussians[-1].params[1]) > n_sigma): break fitted_gaussians += fit.functions if i == 0: zero_peak_gaussian = fit.functions[0] if len(fitted_gaussians) > 1: try: check_pixel_histogram_fit(histogram, fitted_gaussians) except PixelFitError, e: print "PixelFitError:", str(e) gain = fitted_gaussians[1].params[1] - fitted_gaussians[0].params[1] print "gain: %s" % gain zero_peak = fitted_gaussians[0].params[1] photon_threshold = 2 / 3 n_single_photons = flex.sum( histogram.slots()[histogram.get_i_slot(photon_threshold * gain + zero_peak):]) n_double_photons = flex.sum( histogram.slots()[histogram.get_i_slot((1 + photon_threshold) * gain + zero_peak):]) n_single_photons -= n_double_photons print "n_single_photons: %i" % n_single_photons print "n_double_photons: %i" % n_double_photons
def run(args): master_phil = iotbx.phil.parse(master_phil_str) processed = iotbx.phil.process_command_line( args=args, master_string=master_phil_str) args = processed.remaining_args work_params = processed.work.extract() x_offsets = work_params.x_offsets bg_range_min, bg_range_max = work_params.bg_range if work_params.plot_range is not None: x_min, x_max = work_params.plot_range else: x_min, x_max = (0, 385) print bg_range_min, bg_range_max if x_offsets is None: x_offsets = [0]*len(args) legend = work_params.legend linewidth = 2 fontsize = 26 xy_pairs = [] colours = ["cornflowerblue", "darkmagenta", "darkgreen", "black", "red", "blue", "pink"] colours[2] = "orangered" colours[1] = "olivedrab" min_background = 1e16 #x_min, x_max = (0, 391) #x_min, x_max = (0, 360) #x_min, x_max = (200, 360) for i, filename in enumerate(args): print filename f = open(filename, 'rb') x, y = zip(*[line.split() for line in f.readlines() if not line.startswith("#")]) x = flex.double(flex.std_string(x)) y = flex.double(flex.std_string(y)) if work_params.smoothing.method is not None: savitzky_golay_half_window = work_params.smoothing.savitzky_golay.half_window savitzky_golay_degree = work_params.smoothing.savitzky_golay.degree fourier_cutoff = work_params.smoothing.fourier_filter_cutoff method = work_params.smoothing.method if method == "fourier_filter": assert work_params.smoothing.fourier_filter_cutoff is not None if method == "savitzky_golay": x, y = smoothing.savitzky_golay_filter( x, y, savitzky_golay_half_window, savitzky_golay_degree) elif method == "fourier_filter": x, y = smooth_spectrum.fourier_filter(x, y, cutoff_frequency=fourier_cutoff) x += x_offsets[i] y = y.select((x <= x_max) & (x > 0)) x = x.select((x <= x_max) & (x > 0)) bg_sel = (x > bg_range_min) & (x < bg_range_max) xy_pairs.append((x,y)) min_background = min(min_background, flex.mean(y.select(bg_sel))/flex.max(y)) y -= min_background print "Peak maximum at: %i" %int(x[flex.max_index(y)]) for i, filename in enumerate(args): if legend is None: label = filename else: print legend assert len(legend) == len(args) label = legend[i] x, y = xy_pairs[i] if i == -1: x, y = interpolate(x, y) x, y = savitzky_golay_filter(x, y) #if i == 0: #y -= 10 bg_sel = (x > bg_range_min) & (x < bg_range_max) y -= (flex.mean(y.select(bg_sel)) - min_background*flex.max(y)) #y -= flex.min(y) y_min = flex.min(y.select(bg_sel)) if i == -2: y += 0.2 * flex.max(y) print "minimum at: %i" %int(x[flex.min_index(y)]), flex.min(y) #print "fwhm: %.2f" %full_width_half_max(x, y) y /= flex.max(y) if len(colours) > i: pyplot.plot(x, y, label=label, linewidth=linewidth, color=colours[i]) else: pyplot.plot(x, y, label=label, linewidth=linewidth) pyplot.ylabel("Intensity", fontsize=fontsize) pyplot.xlabel("Pixel column", fontsize=fontsize) if i > 0: # For some reason the line below causes a floating point error if we only # have one plot (i.e. i==0) legend = pyplot.legend(loc=2) for t in legend.get_texts(): t.set_fontsize(fontsize) axes = pyplot.axes() for tick in axes.xaxis.get_ticklabels(): tick.set_fontsize(20) for tick in axes.yaxis.get_ticklabels(): tick.set_fontsize(20) pyplot.ylim(0,1) pyplot.xlim(x_min, x_max) ax = pyplot.axes() #ax.xaxis.set_minor_locator(pyplot.MultipleLocator(5)) #ax.yaxis.set_major_locator(pyplot.MultipleLocator(0.1)) #ax.yaxis.set_minor_locator(pyplot.MultipleLocator(0.05)) pyplot.show()
def seed_clustering(self): eps = 1e-6 X_orig = self.coords.as_numpy_array() import numpy as np from scipy.cluster import hierarchy import scipy.spatial.distance as ssd from sklearn.neighbors import NearestNeighbors from sklearn import metrics # initialise cluster labels: -1 signifies doesn't belong to a cluster self.cluster_labels = flex.int(self.coords.all()[0], -1) cluster_id = 0 while self.cluster_labels.count(-1) > 0: dataset_ids = (flex.int_range( len(self.datasets) * len(self.target.get_sym_ops())) % len(self.datasets)).as_numpy_array() coord_ids = flex.int_range(dataset_ids.size).as_numpy_array() # select only those points that don't already belong to a cluster sel = np.where(self.cluster_labels == -1) X = X_orig[sel] dataset_ids = dataset_ids[sel] coord_ids = coord_ids[sel] # choose a high density point as seed for cluster nbrs = NearestNeighbors(n_neighbors=min(11, len(X)), algorithm='brute', metric='cosine').fit(X) distances, indices = nbrs.kneighbors(X) average_distance = flex.double( [dist[1:].mean() for dist in distances]) i = flex.min_index(average_distance) d_id = dataset_ids[i] cluster = np.array([coord_ids[i]]) cluster_dataset_ids = np.array([d_id]) xis = np.array([X[i]]) for j in range(len(self.datasets) - 1): # select only those rows that don't correspond to a dataset already # present in current cluster sel = np.where(dataset_ids != d_id) X = X[sel] dataset_ids = dataset_ids[sel] coord_ids = coord_ids[sel] assert len(X) > 0 # Find nearest neighbour in cosine-space to the current cluster centroid nbrs = NearestNeighbors(n_neighbors=min(1, len(X)), algorithm='brute', metric='cosine').fit(X) distances, indices = nbrs.kneighbors([xis.mean(axis=0)]) k = indices[0][0] d_id = dataset_ids[k] cluster = np.append(cluster, coord_ids[k]) cluster_dataset_ids = np.append(cluster_dataset_ids, d_id) xis = np.append(xis, [X[k]], axis=0) # label this cluster self.cluster_labels.set_selected(flex.size_t(cluster.tolist()), cluster_id) cluster_id += 1 if flex.max(self.cluster_labels) == 0: # assume single cluster return self.cluster_labels cluster_centroids = [] X = self.coords.as_numpy_array() for i in set(self.cluster_labels): sel = self.cluster_labels == i cluster_centroids.append(X[( self.cluster_labels == i).iselection().as_numpy_array()].mean( axis=0)) # hierarchical clustering of cluster centroids, using cosine metric dist_mat = ssd.pdist(cluster_centroids, metric='cosine') linkage_matrix = hierarchy.linkage(dist_mat, method='average') # compare valid equal-sized clustering using silhouette scores # https://en.wikipedia.org/wiki/Silhouette_(clustering) # http://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_silhouette_analysis.html distances = linkage_matrix[::, 2] distances = np.insert(distances, 0, 0) silhouette_scores = flex.double() thresholds = flex.double() n_clusters = flex.size_t() for threshold in distances[1:]: cluster_labels = self.cluster_labels.deep_copy() labels = hierarchy.fcluster(linkage_matrix, threshold - eps, criterion='distance').tolist() counts = [labels.count(l) for l in set(labels)] if len(set(counts)) > 1: # only equal-sized clusters are valid continue n = len(set(labels)) if n == 1: continue for i in range(len(labels)): cluster_labels.set_selected(self.cluster_labels == i, int(labels[i] - 1)) silhouette_avg = metrics.silhouette_score( X, cluster_labels.as_numpy_array(), metric='cosine') # Compute the silhouette scores for each sample sample_silhouette_values = metrics.silhouette_samples( X, cluster_labels.as_numpy_array(), metric='cosine') silhouette_avg = sample_silhouette_values.mean() silhouette_scores.append(silhouette_avg) thresholds.append(threshold) n_clusters.append(n) count_negative = (sample_silhouette_values < 0).sum() logger.info('Clustering:') logger.info(' Number of clusters: %i' % n) logger.info(' Threshold score: %.3f (%.1f deg)' % (threshold, math.degrees(math.acos(1 - threshold)))) logger.info(' Silhouette score: %.3f' % silhouette_avg) logger.info(' -ve silhouette scores: %.1f%%' % (100 * count_negative / sample_silhouette_values.size)) if self.params.save_plot: plot_silhouette(sample_silhouette_values, cluster_labels.as_numpy_array(), file_name='%ssilhouette_%i.png' % (self.params.plot_prefix, n)) if self.params.cluster.seed.n_clusters is Auto: idx = flex.max_index(silhouette_scores) else: idx = flex.first_index(n_clusters, self.params.cluster.seed.n_clusters) if idx is None: raise Sorry('No valid clustering with %i clusters' % self.params.cluster.seed.n_clusters) if (self.params.cluster.seed.n_clusters is Auto and silhouette_scores[idx] < self.params.cluster.seed.min_silhouette_score): # assume single cluster self.cluster_labels = flex.int(self.cluster_labels.size(), 0) else: threshold = thresholds[idx] - eps labels = hierarchy.fcluster(linkage_matrix, threshold, criterion='distance') cluster_labels = flex.double(self.cluster_labels.size(), -1) for i in range(len(labels)): cluster_labels.set_selected(self.cluster_labels == i, labels[i] - 1) self.cluster_labels = cluster_labels if self.params.save_plot: plot_matrix(1 - ssd.squareform(dist_mat), linkage_matrix, '%sseed_clustering_cos_angle_matrix.png' % self.params.plot_prefix, color_threshold=threshold) plot_dendrogram(linkage_matrix, '%sseed_clustering_cos_angle_dendrogram.png' % self.params.plot_prefix, color_threshold=threshold) return self.cluster_labels
def __init__(self, datasets, params): self.datasets = datasets self.params = params self.input_space_group = None for dataset in datasets: if self.input_space_group is None: self.input_space_group = dataset.space_group() else: assert dataset.space_group() == self.input_space_group if self.params.dimensions is Auto: dimensions = None else: dimensions = self.params.dimensions lattice_group = None if self.params.lattice_group is not None: lattice_group = self.params.lattice_group.group() self.target = target.Target( self.datasets, min_pairs=self.params.min_pairs, lattice_group=lattice_group, dimensions=dimensions, verbose=self.params.verbose, weights=self.params.weights, nproc=self.params.nproc, ) if self.params.dimensions is Auto: dimensions = [] functional = [] explained_variance = [] explained_variance_ratio = [] for dim in range(1, self.target.dim + 1): self.target.set_dimensions(dim) self.optimise() logger.info('Functional: %g' % self.minimizer.f) self.principal_component_analysis() dimensions.append(dim) functional.append(self.minimizer.f) explained_variance.append(self.explained_variance) explained_variance_ratio.append(self.explained_variance_ratio) # Find the elbow point of the curve, in the same manner as that used by # distl spotfinder for resolution method 1 (Zhang et al 2006). # See also dials/algorithms/spot_finding/per_image_analysis.py from scitbx import matrix x = flex.double(dimensions) y = flex.double(functional) slopes = (y[-1] - y[:-1]) / (x[-1] - x[:-1]) p_m = flex.min_index(slopes) x1 = matrix.col((x[p_m], y[p_m])) x2 = matrix.col((x[-1], y[-1])) gaps = flex.double() v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize() for i in range(p_m, len(x)): x0 = matrix.col((x[i], y[i])) r = x1 - x0 g = abs(v.dot(r)) gaps.append(g) p_k = flex.max_index(gaps) g_k = gaps[p_k] p_g = p_k x_g = x[p_g + p_m] y_g = y[p_g + p_m] logger.info('Best number of dimensions: %i' % x_g) self.target.set_dimensions(int(x_g)) if params.save_plot: from matplotlib import pyplot as plt fig = plt.figure(figsize=(10, 8)) plt.clf() plt.plot(dimensions, functional) plt.plot([x_g, x_g], plt.ylim()) plt.xlabel('Dimensions') plt.ylabel('Functional') plt.savefig('%sfunctional_vs_dimension.png' % params.plot_prefix) plt.clf() for dim, expl_var in zip(dimensions, explained_variance): plt.plot(range(1, dim + 1), expl_var, label='%s' % dim) plt.plot([x_g, x_g], plt.ylim()) plt.xlabel('Dimension') plt.ylabel('Explained variance') plt.savefig('%sexplained_variance_vs_dimension.png' % params.plot_prefix) plt.clf() for dim, expl_var_ratio in zip(dimensions, explained_variance_ratio): plt.plot(range(1, dim + 1), expl_var_ratio, label='%s' % dim) plt.plot([x_g, x_g], plt.ylim()) plt.xlabel('Dimension') plt.ylabel('Explained variance ratio') plt.savefig('%sexplained_variance_ratio_vs_dimension.png' % params.plot_prefix) plt.close(fig) self.optimise() self.principal_component_analysis() self.cosine_analysis() self.cluster_analysis() if self.params.save_plot: self.plot()
def fit_one_histogram(self, pixel, n_gaussians=2): histogram = self.histograms[pixel] fitted_gaussians = [] slot_centers = histogram.slot_centers() slots = histogram.slots().as_double() zero_peak_gaussian = None for i in range(n_gaussians): if i == 0: lower_threshold = -1000 upper_threshold = 0.4 * self.estimated_gain mean = 0 fit = self.single_peak_fit(histogram, lower_threshold, upper_threshold, mean, zero_peak_gaussian=zero_peak_gaussian) hist_max = flex.max(histogram.slots()) if abs(fit.functions[0].params[0] - hist_max)/hist_max > 0.1: upper_threshold = 0.3 * self.estimated_gain fit = self.single_peak_fit(histogram, lower_threshold, upper_threshold, mean, zero_peak_gaussian=zero_peak_gaussian) else: y_obs = histogram.slots().as_double() x = histogram.slot_centers() y_calc = flex.double(y_obs.size(), 0) for g in fitted_gaussians: y_calc += g(x) residual = y_obs - y_calc # triangular smoothing of residual to find peak position residual = sliding_average(residual) residual = sliding_average(residual) for n in (4, 5, 6, 7, 8): #for n in (5, 6, 7, 8): # we assume that the peaks are separated by at least n sigma n_sigma = abs(n * fitted_gaussians[0].params[2]) slot_i = histogram.get_i_slot(fitted_gaussians[i-1].params[1]+n_sigma) max_slot_i = flex.max_index(residual[slot_i:]) + slot_i mean = slot_centers[max_slot_i] lower_threshold = mean - 0.3 * (mean - fitted_gaussians[0].params[1]) upper_threshold = mean + 0.4 * (mean - fitted_gaussians[0].params[1]) #print lower_threshold, mean, upper_threshold #zero_peak_gaussian = None fit = self.single_peak_fit(histogram, lower_threshold, upper_threshold, mean, zero_peak_gaussian=zero_peak_gaussian) if (fit.functions[0].params[1] > fitted_gaussians[-1].params[1] and fit.functions[0].sigma > 0.5 * fitted_gaussians[-1].sigma and (fit.functions[0].params[1] - fitted_gaussians[-1].params[1]) > n_sigma): break fitted_gaussians += fit.functions if i == 0: zero_peak_gaussian = fit.functions[0] if len(fitted_gaussians) > 1: try: check_pixel_histogram_fit(histogram, fitted_gaussians) except PixelFitError, e: print "PixelFitError:", str(e) gain = fitted_gaussians[1].params[1] - fitted_gaussians[0].params[1] print "gain: %s" %gain zero_peak = fitted_gaussians[0].params[1] photon_threshold = 2/3 n_single_photons = flex.sum( histogram.slots()[histogram.get_i_slot(photon_threshold * gain + zero_peak):]) n_double_photons = flex.sum( histogram.slots()[histogram.get_i_slot((1+photon_threshold) * gain + zero_peak):]) n_single_photons -= n_double_photons print "n_single_photons: %i" %n_single_photons print "n_double_photons: %i" %n_double_photons
def event(self, evt, env): """The event() function is called for every L1Accept transition. For now, log error and set bogus value to allow stuff to continue -- must check for the bogosity later XXX The dead time of the detector complicates checking how often things are updated! Move this to the ring buffer? @param evt Event data object, a configure object @param env Environment object """ from pyana.event import Event from acqiris_ext import acqiris_integrate, apd_hitfind super(mod_ledge, self).event(evt, env) if evt.status() != Event.Normal: pass # XXX return -- Never skip because arrays will end up # different length, so ignore this? # Get the time of the event, in fractional seconds since the # epoch. This is needed for all subsequent history-keeping, and # is hence determined first. XXX Is history-keeping even # justified? time = cspad_tbx.evt_time(evt) if time is None: time = float("nan") else: time = time[0] + time[1] / 1e3 self._timestamp.append(time) # The repetition rate is currently just used for sanity checking. repetition_rate = cspad_tbx.evt_repetition_rate(evt) if repetition_rate is None: repetition_rate = float("nan") self._repetition_rate.append(repetition_rate) # Get the I0. No need to warn about it here, it will be done once # the image is written out. I0 = cspad_tbx.evt_pulse_energy(evt) if I0 is None: I0 = float("nan") self._I0.append(I0) # Get the FEE energy. Average the two readings before and after # attenuation separately. XXX What are the units? It look like # it could be mJ? fee_before = 0.5 * sum(evt.getFeeGasDet()[0:2]) if fee_before is None: fee_before = float("nan") self._fee_before.append(fee_before) fee_after = 0.5 * sum(evt.getFeeGasDet()[2:4]) if fee_after is None: fee_after = float("nan") self._fee_after.append(fee_after) # XXX Just a check: this is what xtcexplorer does: fee_energy = evt.get(xtc.TypeId.Type.Id_FEEGasDetEnergy) if fee_energy is not None: assert ( evt.getFeeGasDet()[0] == fee_energy.f_11_ENRC and evt.getFeeGasDet()[1] == fee_energy.f_12_ENRC and evt.getFeeGasDet()[2] == fee_energy.f_21_ENRC and evt.getFeeGasDet()[3] == fee_energy.f_22_ENRC ) """ # For Bill: expect 84240 data points for r0054 # # grep "^BILL_POINT" | cut -d' ' -f2,3,4,5,6 > t.dat # gnuplot> m=0.1 ; k=-0.01e-8; f(x) = k * x + m # gnuplot> fit f(x) "t.dat" using ($3):($5) via k,m if not hasattr(self, '_gmd_seqno'): self._gmd_seqno = 0 gmd = evt.get(key=xtc.TypeId.Type.Id_GMD) if gmd is None: return acq_apd = evt.getAcqValue('SxrEndstation-0|Acqiris-1', 0, env) if acq_apd is not None and acq_apd.waveform() is not None: w = acq_apd.waveform() baseline = numpy.mean(w[0:(w.shape[0] / 5)]) peak = numpy.min(w[(w.shape[0] / 5):w.shape[0]]) self._gmd_seqno += 1 print "BILL_POINT %d %s %s %s %s" % (self._gmd_seqno, repr(gmd.fBgValuePerSample), repr(gmd.fCorrectedSumPerPulse), repr(gmd.fRelativeEnergyPerPulse), repr(peak - baseline)) return """ """ # XXX Record injector motion--note that they cannot be added--see # Ray's email. injector_micos_xyz = cspad_tbx.env_pv3_get( env, ['SXR:EXP:MZM:%02d:ENCPOSITIONGET' % i for i in [1, 2, 3]]) if injector_micos_xyz is None: self.logger.error("No micos injector motor positions") injector_micos_xyz = (float('nan'), float('nan'), float('nan')) self._injector_micos_xyz.append(injector_micos_xyz) injector_rough_xyz = cspad_tbx.env_pv3_get( env, ['SXR:EXP:MMS:%02d.RBV' % i for i in [1, 2, 3]]) if injector_rough_xyz is None: self.logger.error("No rough injector motor positions") injector_rough_xyz = (float('nan'), float('nan'), float('nan')) self._injector_rough_xyz.append(injector_rough_xyz) # Injector power supplies XXX There is a third PSU, no? # # The -5kV supply # SXR:EXP:SHV:VHS6:CH0:VoltageMeasure # SXR:EXP:SHV:VHS6:CH0:CurrentMeasure # # The plus 5kV supply # SXR:EXP:SHV:VHS2:CH0:VoltageMeasure # SXR:EXP:SHV:VHS2:CH0:CurrentMeasure injector_plus_current = cspad_tbx.env_pv1_get( env, 'SXR:EXP:SHV:VHS6:CH0:CurrentMeasure') if injector_plus_current is None: self.logger.error("No plus-motor current") injector_plus_current = -1 self._injector_plus_current.append(injector_plus_current) injector_plus_voltage = cspad_tbx.env_pv1_get( env, 'SXR:EXP:SHV:VHS6:CH0:VoltageMeasure') if injector_plus_voltage is None: self.logger.error("No plus-motor voltage") injector_plus_voltage = -1 self._injector_plus_voltage.append(injector_plus_voltage) injector_minus_current = cspad_tbx.env_pv1_get( env, 'SXR:EXP:SHV:VHS2:CH0:CurrentMeasure') if injector_minus_current is None: self.logger.error("No minus-motor current") injector_minus_current = -1 self._injector_minus_current.append(injector_minus_current) injector_minus_voltage = cspad_tbx.env_pv1_get( env, 'SXR:EXP:SHV:VHS2:CH0:VoltageMeasure') if injector_minus_voltage is None: self.logger.error("No minus-motor voltage") injector_minus_voltage = -1 self._injector_minus_voltage.append(injector_minus_voltage) """ """ # The spectrometer motor positions are just used for sanity # checking. spectrometer_xyz = cspad_tbx.env_spectrometer_xyz_sxr(env) if spectrometer_xyz is None: self.logger.error("No spectrometer motor positions") spectrometer_xyz = (float('nan'), float('nan'), float('nan')) self._spectrometer_xyz.append(spectrometer_xyz) """ # Get the pulse energy after monochromator, and fall back on the # pre-monochromator energy if the former is absent. Record in # list for mean and stddev. XXX Verify that the wavelength after # the monochromator is updated at around 1 Hz. # # For the publication an offset and scale were calibrated. wavelength = cspad_tbx.env_wavelength_sxr(evt, env) if wavelength is None: wavelength = cspad_tbx.evt_wavelength(evt) if wavelength is None: energy = float("nan") else: energy = 12398.4187 / wavelength self._energy.append(energy) self._history_energy.push(time, energy) # XXX Not necessary?! """ # Laser shutters XXX need to sort out laser numbering XXX Laser # power stuff? XXX Position of polarizer/analyser shutters = cspad_tbx.env_laser_shutters(env) #print "Got shutters", shutters """ # Read out the diode traces from the via the Acqiris. XXX In any # case, the APD and the more sensitive Opto Diode in the monitor # tank (i.e. the transmission diode) should be anti-correlated, so # check it! The entire trace always covers 10 us. XXX Could this # be figured out from xtc.TypeId.Type.Id_AcqConfig? # # XXX This appears to be suboptimal: look at the # skewness-transform for the APD to sort this out. acq_apd = evt.getAcqValue("SxrEndstation-0|Acqiris-1", 0, env) acq_apd_integral = float("nan") if acq_apd is not None: waveform = acq_apd.waveform() if waveform is not None: # With a 40k-point trace, one should integrate from 18200 to # 18400. waveform = waveform.flatten() nmemb = len(waveform) // 200 if nmemb > 0: acq_apd_integral = acqiris_integrate(flex.double(waveform), 91 * nmemb, 100 * nmemb, nmemb) self._acq_apd_integral.append(acq_apd_integral) if evt.expNum() == 208: # Opto diode address for L632. acq_opto_diode = evt.getAcqValue("SxrEndstation-0|Acqiris-1", 1, env) elif evt.expNum() == 363: # Opto diode address for LB68. acq_opto_diode = evt.getAcqValue("SxrEndstation-0|Acqiris-2", 2, env) acq_opto_diode_integral = float("nan") if acq_opto_diode is not None: waveform = acq_opto_diode.waveform() if waveform is not None: # With a 40k-point trace, one should integrate from 16000 to # 24000. With a 20k-point trace, a suitable integration # region is bounded by 8000 and 12000. There is no need for # thresholding, because the integral of the Opto Diode will # not be used for hit finding. XXX What are the "misses" we # record on the Opto Diode? XXX The direct beam is completely # gone after it hits the sample, because soft X-rays. waveform = waveform.flatten() nmemb = len(waveform) // 5 if nmemb > 0: acq_opto_diode_integral = acqiris_integrate(flex.double(waveform), 2 * nmemb, 4 * nmemb, nmemb) self._acq_opto_diode_integral.append(acq_opto_diode_integral) # Sanity check: verify that the timestamps for the two Acqiris # traces are similar enough. if acq_apd is not None and acq_opto_diode is not None: assert ( len(acq_apd.timestamps()) == len(acq_opto_diode.timestamps()) and numpy.any(numpy.abs(acq_apd.timestamps() - acq_opto_diode.timestamps())) < 1e-6 ) # self.logger.info("DIODE INTEGRALS: %f %f %f" % (I0, acq_apd_integral, acq_opto_diode_integral)) """ import matplotlib.pyplot as plt hit_array_apd = apd_hitfind( flex.double(acq_apd.waveform()), len(acq_apd.waveform()) // 5) hit_array_opto_diode = apd_hitfind( flex.double(acq_opto_diode.waveform()), len(acq_opto_diode.waveform()) // 5) fig = plt.figure() ax = fig.add_subplot(111) #ax.plot( # range(len(acq_apd.timestamps())), acq_apd.waveform()) ax.plot( range(len(acq_opto_diode.timestamps())), acq_opto_diode.waveform()[0, :]) plt.show() fig = plt.figure() ax = fig.add_subplot(111) #ax.plot( # acq_apd.timestamps()[0:len(hit_array_apd)], hit_array) ax.plot( acq_opto_diode.timestamps()[0:len(hit_array_opto_diode)], hit_array) plt.show() """ # Determine whether the beam hit the sample, and register the # outcome. If not using any diodes for hit-finding, every shot is # assumed to be a hit. XXX Unfortunately, this crucial piece is # very unreliable. The threshold for the APD needs to be # verified--inspect all the histograms. XXX hitfind_flags is # probable better as a module parameter. # hitfind_flags = 0x3 hitfind_flags = 0 hit = False if not hitfind_flags: hit = True elif hitfind_flags & 0x1 and acq_apd_integral > 0.2: hit = True self._hit.append(hit) # Always proceed all the way through (even if some shots have # invalid values of e.g. I0) because images are precious. XXX # Must reset counters before returning! XXX What about skipping # all of the above if display is True? if self.cspad_img is not None: self._nframes += 1 """ # The spectrometer should not move! t = (self._spectrometer_xyz - self._spectrometer_xyz.mean()).rms_length() print "Spectrometer displacement", t # Fine/rough motor position deviations from the mean. See Ray's # email. t = (self._injector_micos_xyz - self._injector_micos_xyz.mean()).rms_length() print "Injector micos displacement", t t = (self._injector_rough_xyz - self._injector_rough_xyz.mean()).rms_length() print "Injector rough displacement", t # Injector motor position means and deviations if self._injector_plus_current.size() > 1: t = flex.mean_and_variance(self._injector_plus_current) print "Injector plus current mean %10e stddev %10e" % \ (t.mean(), t.unweighted_sample_standard_deviation()) if self._injector_plus_voltage.size() > 1: t = flex.mean_and_variance(self._injector_plus_voltage) print "Injector plus voltage mean %10e stddev %10e" % \ (t.mean(), t.unweighted_sample_standard_deviation()) if self._injector_minus_current.size() > 1: t = flex.mean_and_variance(self._injector_minus_current) print "Injector minus current mean %10e stddev %10e" % \ (t.mean(), t.unweighted_sample_standard_deviation()) if self._injector_minus_voltage.size() > 1: t = flex.mean_and_variance(self._injector_minus_voltage) print "Injector minus voltage mean %10e stddev %10e" % \ (t.mean(), t.unweighted_sample_standard_deviation()) """ # Energy statistics are collected from all shots, regardless of # whether they are hits or not. Since this statistic mentions # the frame number, it should be reported first. XXX The energy # should have a really small standard deviation. Check # self._energy.size() and self._history_energy.frequency() XXX # verify that it works for one data point. (energy_mean, energy_stddev, energy_nmemb, n) = self._filtered_stats( lambda x: not math.isnan(x) and x > 0, self._energy ) if n > 0: self.logger.warning("%d shots have undefined energy" % n) (I0_mean, I0_stddev, I0_nmemb, n) = self._filtered_stats(lambda x: not math.isnan(x), self._I0) if n > 0: self.logger.warning("%d shots have undefined I0" % n) self.logger.info( "Frame %d: E=%.3f+/-%.3f (N=%d) I0=%.0f+/-%.0f (N=%d)" % (self._nframes, energy_mean, energy_stddev, energy_nmemb, I0_mean, I0_stddev, I0_nmemb) ) # Sanity check: unless changed while integrating the frame, the # repetition rate should have a standard deviation of zero. dt = self._timestamp[-1] - self._timestamp[0] rr_mean = rr_observed = rr_stddev = 0 if dt > 0: rr_observed = (len(self._timestamp) - 1) / dt rr = filter(lambda x: not math.isnan(x) and x > 0, self._repetition_rate) if len(rr) > 1: rr_stats = flex.mean_and_variance(flex.double(rr)) rr_mean = rr_stats.mean() rr_stddev = rr_stats.unweighted_sample_standard_deviation() self.logger.info( "Repetition rate: %.3f Hz (observed), %.3f+/-%.3f Hz (expected)" % (rr_observed, rr_mean, rr_stddev) ) # Compare observed and configured exposure time. config = cspad_tbx.getConfig(self.address, env) exposure_time = 0 if config is not None and dt > 0 and len(self._timestamp) > 0: exposure_time = dt * (len(self._timestamp) + 1) / len(self._timestamp) self.logger.info( "Exposure time: %.3f s (observed), %.3f s (configured)" % (exposure_time, config.exposureTime()) ) # Compute the leading dead time, the time between starting the # readout of the previous frame and the arrival of the shot # immediately following it. This is an interesting statistic, # no matter what. XXX Maybe look at its distribution? dead_time = 0 if rr_observed > 0 and hasattr(self, "_previous_readout_time"): dead_time = self._timestamp[0] - self._previous_readout_time - 1 / rr_observed if math.isnan(dead_time): dead_time = 0 self.logger.info("Dead time: %.3f s" % dead_time) self._previous_readout_time = self._timestamp[-1] assert time == self._timestamp[-1] # XXX ZAP once one run survives it! # Flag blank images (i.e. images that had no hits), because # these may interesting for background subtraction. hits = self._hit.count(True) self.logger.info("Hit rate: %d/%d (%.2f%%)" % (hits, self._hit.size(), 100 * hits / self._hit.size())) if hits == 0: self.logger.info("Frame %d is blank" % self._nframes) # Get the normalisation factor by summing up I0 for all hits. # Invalid and non-positive values of I0 are treated as zeroes. # XXX Make this kind of summing a function of its own. I0 = sum(filter(lambda x: not math.isnan(x) and x > 0, self._I0.select(self._hit))) I0_all = sum(filter(lambda x: not math.isnan(x) and x > 0, self._I0)) fee_before_all = sum(filter(lambda x: not math.isnan(x) and x > 0, self._fee_before)) fee_after_all = sum(filter(lambda x: not math.isnan(x) and x > 0, self._fee_after)) # Register the template to the image and locate the regions of # interest based on the registration parameters. XXX Should # also give contrast: fit 2D-Gaussian to peak and report its # standard deviations and fit? if self._template is not None: gamma = lewis(self._template, self.cspad_img) p = flex.max_index(gamma) peak = ( p // gamma.focus()[1] - self._template.focus()[0] + 1, p % gamma.focus()[1] - self._template.focus()[1] + 1, ) # """ ### REFERENCE CHECK ### from os.path import dirname, isdir, join from scipy import io mat_dirname = dirname(cspad_tbx.pathsubst(self._mat_path, evt, env, frame_number=self._nframes)) if not isdir(mat_dirname): makedirs(mat_dirname) io.savemat( file_name=join(mat_dirname, "cross-check-%05d.mat" % self._nframes), mdict=dict( image=self.cspad_img.as_numpy_array(), template=self._template.as_numpy_array(), gamma=gamma.as_numpy_array(), peak=numpy.array(peak), ), appendmat=False, do_compression=True, oned_as="column", ) return ### REFERENCE CHECK ### # """ else: # Alternative: position everything with respect to the frame # origin. peak = (0, 0) # XXX Come up with a better way to handle the offsets! They # really do depend on the template, and should therefore be # packaged with it. self.logger.info("Template registration anchor point (%d, %d)" % (peak[0], peak[1])) roi = [] if evt.expNum() == 208: # Regions of interest for L632 (experiment number 208). XXX # Could perhaps migrate the template matching here instead? # The left, middle, and right manganese signals. XXX Extend the # rightmost ROI three pixels in upward direction (see runs 145 # and onwards, also note narrower slit)? roi.append((peak[0] + 59, peak[1] - 24, 12, 5)) roi.append((peak[0] + 61, peak[1] + 28, 12, 4)) roi.append((peak[0] + 61, peak[1] + 79, 12, 5)) # Two background regions between the manganese spots, with the # same total area as the signal. roi.append((peak[0] + 62, peak[1] + 1, 8, 8)) roi.append((peak[0] + 63, peak[1] + 51, 8, 8)) # The left and right direct reflections from the Si substrate # (i.e. the areas between the zone plates). These were the # features used for template registration. roi.append((peak[0], peak[1], 40, 10)) roi.append((peak[0], peak[1] + 50, 40, 9)) # Spot between the direct reflections. XXX What is this? roi.append((peak[0] + 1, peak[1] + 23, 22, 13)) # The horizontal slit, where the direct reflection occurs. This # is fixed. XXX Verify this! roi.append((22, 0, 41, 128)) # Background stripe, below the manganese spots. This is fixed # to the bottom of the detector. roi.append((104, 0, 20, 128)) elif evt.expNum() == 363: # Regions of interest for LB68 (experiment number 363). # 0-pixel are active, 255-pixel are inactive from scipy.misc import imread # Dec 5, 2013 (09:00 - 21:00): initial estimates from r0010 """ roi.append((peak[0] + 14, peak[1] + 138 + 23, 25, 50 - 25)) roi.append((peak[0] + 45, peak[1] + 138 + 23, 25, 50 - 25)) roi.append((peak[0] + 78, peak[1] + 137 + 23, 25, 50 - 25)) roi.append((peak[0] + 111, peak[1] + 137 + 23, 25, 50 - 25)) roi.append((peak[0] + 144, peak[1] + 137 + 23, 25, 50 - 25)) roi.append((peak[0] + 177, peak[1] + 136 + 23, 25, 50 - 25)) roi.append((peak[0] + 210, peak[1] + 136 + 23, 25, 50 - 25)) roi.append((peak[0] + 243, peak[1] + 136 + 23, 25, 50 - 25)) roi.append((peak[0] + 278, peak[1] + 135 + 23, 25, 50 - 25)) roi.append((peak[0] + 312, peak[1] + 135 + 23, 25, 50 - 25)) roi.append((peak[0] + 344, peak[1] + 135 + 23, 25, 50 - 25)) roi.append((peak[0] + 376, peak[1] + 135 + 23, 25, 50 - 25)) roi.append((peak[0] + 408, peak[1] + 135 + 23, 25, 50 - 25)) roi.append((peak[0] + 442, peak[1] + 135 + 23, 25, 50 - 25)) roi.append((peak[0] + 475, peak[1] + 135 + 23, 25, 50 - 25)) """ # Dec 6, 2013 (09:00 - 21:00): rough estimates """ roi.append((peak[0] + 0, peak[1] + 25, 512, 25)) # bkg roi.append((peak[0] + 0, peak[1] + 135, 512, 25)) # oxygen roi.append((peak[0] + 0, peak[1] + 160, 512, 25)) # signal roi.append((peak[0] + 0, peak[1] + 300, 512, 130)) # zeroth order """ # Dec 7, 2013 (09:00 - 21:00): overlap between oxygen and # signal. Will loose some signal. """ roi.append((peak[0] + 0, peak[1] + 25, 512, 25)) # bkg roi.append((peak[0] + 0, peak[1] + 135, 512, 50)) # oxygen roi.append((peak[0] + 0, peak[1] + 185, 512, 40)) # signal roi.append((peak[0] + 0, peak[1] + 270, 512, 170)) # zeroth order """ """ # Dec 7 2013 (09:00 - 21:00): binary masks stored in PNG # images. roi.append((peak[0] + 0, peak[1] + 25, 512, 25)) # bkg roi.append((peak[0] + 0, peak[1] + 135, 512, 25)) # oxygen #roi_image = flex.float( # imread('/reg/neh/home1/hattne/myrelease/LB68-r0039-max-mask.png', # flatten=True)) #roi_image = flex.float( # imread('/reg/neh/home1/hattne/myrelease/LB68-r0039-std-mask.png', # flatten=True)) roi_image = flex.float( imread('/reg/neh/home1/hattne/myrelease/LB68-r0052-avg-mask.png', flatten=True)) roi_image = (255 - roi_image) #roi.append((0, 0, self.cspad_img.focus()[0], self.cspad_img.focus()[1])) roi.append(roi_image) roi.append((peak[0] + 0, peak[1] + 270, 512, 170)) # zeroth order """ # Dec 9, 2013 (09:00 - 21:00) # """ roi.append((peak[0] + 0, peak[1] + 25, 512, 25)) # bkg roi.append((peak[0] + 0, peak[1] + 135, 512, 25)) # oxygen # roi.append((peak[0] + 0, peak[1] + 160, 512, 25)) # signal roi_image = flex.float(imread("/reg/neh/home1/hattne/myrelease/LB68-r0067-max-mask.png", flatten=True)) roi.append(roi_image) roi.append((peak[0] + 0, peak[1] + 240, 512, 180)) # zeroth order # """ else: self.logger.error( "No regions of interest for %s (experiment number %d)" % (env.experiment(), evt.expNum()) ) # Clip the regions of interest to the actual image. If the ROI # does not overlap with the image at all, set its width and # height to zero. XXX Do the integration here as well? for i in range(len(roi)): if not isinstance(roi[i], tuple): continue r = roi[i] if ( r[0] + r[2] < 0 or r[0] >= self.cspad_img.focus()[0] or r[1] + r[3] < 0 or r[1] >= self.cspad_img.focus()[1] ): roi[i] = (r[0], r[1], 0, 0) continue r = roi[i] if r[0] < 0: roi[i] = (0, r[1], r[2] + r[0], r[3]) r = roi[i] if r[1] < 0: roi[i] = (r[0], 0, r[2], r[3] + r[1]) r = roi[i] if r[0] + r[2] > self.cspad_img.focus()[0]: roi[i] = (r[0], r[1], self.cspad_img.focus()[0] - r[0], r[3]) r = roi[i] if r[1] + r[3] > self.cspad_img.focus()[1]: roi[i] = (r[0], r[1], r[2], self.cspad_img.focus()[1] - r[1]) # Sum up intensities in all regions of interest, and keep track # of the actual number of pixels summed. The common_mode module # takes care of dark-subtraction. XXX Would like to estimate # sigma for spot, like in spotfinder/LABELIT. I = flex.double(len(roi)) I_nmemb = flex.int(len(roi)) for i in range(len(roi)): if isinstance(roi[i], flex.float): sel = roi[i].as_1d() < 128 I[i] = flex.sum(self.cspad_img.as_1d().select(sel)) I_nmemb[i] = sel.count(True) continue if roi[i][2] <= 0 or roi[i][3] <= 0: I[i] = 0 I_nmemb[i] = 0 else: I[i] = flex.sum( self.cspad_img.matrix_copy_block( i_row=roi[i][0], i_column=roi[i][1], n_rows=roi[i][2], n_columns=roi[i][3] ) ) I_nmemb[i] = roi[i][2] * roi[i][3] """ # Sanity check: white out the region of interest. self.cspad_img.matrix_paste_block_in_place( block=flex.double(flex.grid(roi[i][2], roi[i][3])), i_row=roi[i][0], i_column=roi[i][1]) """ acq_apd_sum = sum(filter(lambda x: not math.isnan(x) and x > 0, self._acq_apd_integral.select(self._hit))) acq_opto_diode_sum = sum( filter(lambda x: not math.isnan(x) and x > 0, self._acq_opto_diode_integral.select(self._hit)) ) acq_apd_sum_all = sum(filter(lambda x: not math.isnan(x) and x > 0, self._acq_apd_integral)) acq_opto_diode_sum_all = sum(filter(lambda x: not math.isnan(x) and x > 0, self._acq_opto_diode_integral)) # Append the data point to the stream: shots, hits, energy, and # I. XXX OrderedDict requires Python 2.7, could fall back on # regular Dict at the price of non-deterministic column order. from collections import OrderedDict csv_dict = OrderedDict( [ ("n_frames", self._hit.size()), ("n_hits", hits), ("I0", I0), ("I0_all", I0_all), ("fee_before_all", fee_before_all), ("fee_after_all", fee_after_all), ("energy_mean", energy_mean), ("acq_apd_sum", acq_apd_sum), ("acq_apd_sum_all", acq_apd_sum_all), ("acq_opto_diode_sum", acq_opto_diode_sum), ("acq_opto_diode_sum_all", acq_opto_diode_sum_all), ] ) for (i, item) in enumerate(zip(roi, I, I_nmemb)): key = "roi_" + ("bkg", "oxygen", "manganese", "zeroth_order")[i] csv_dict["%s_nmemb" % key] = item[2] if isinstance(item[0], tuple): csv_dict["%s_ss_start" % key] = item[0][0] csv_dict["%s_fs_start" % key] = item[0][1] csv_dict["%s_ss_size" % key] = item[0][2] csv_dict["%s_fs_size" % key] = item[0][3] else: csv_dict["%s_ss_start" % key] = 0 csv_dict["%s_fs_start" % key] = 0 csv_dict["%s_ss_size" % key] = item[0].focus()[0] csv_dict["%s_fs_size" % key] = item[0].focus()[1] csv_dict["%s_I" % key] = item[1] # XXX assert that keys match up with what's in the file already? # Or exploit the error-reporting mechanism already implemented? # Write the header. XXX How to control the order of the # columns? if not hasattr(self, "_csv"): from csv import DictWriter self._csv = DictWriter(self._stream_table, csv_dict.keys()) self._csv.writerow({key: key for key in csv_dict.keys()}) self._csv.writerow(csv_dict) # Output the non-normalised image and all other relevant data to # a binary MATLAB file. XXX What if scipy is not available? from os import makedirs, path from scipy import io mat_path = cspad_tbx.pathsubst(self._mat_path, evt, env, frame_number=self._nframes) if not path.isdir(path.dirname(mat_path)): makedirs(path.dirname(mat_path)) io.savemat( file_name=mat_path, mdict=dict( DATA=self.cspad_img.as_numpy_array(), DIODES=numpy.array((acq_apd_sum, acq_apd_sum_all, acq_opto_diode_sum, acq_opto_diode_sum_all)), ENERGY=energy_mean, HITS=numpy.array((hits, self._hit.size())), I0=numpy.array((I0, I0_all)), INTENSITIES=numpy.array(I), ROIS=numpy.array([r for r in roi if isinstance(r, tuple)]), ), appendmat=False, do_compression=True, oned_as="column", ) # Optionally update the image in the viewer. See mod_view. if self._display: from time import localtime, strftime # Copy over regions of interest to shared multiprocessing # array. XXX Flip to honour wxPython convention. for i in range(len(roi)): if not isinstance(roi[i], tuple): continue self._roi[4 * i + 0] = roi[i][1] self._roi[4 * i + 1] = roi[i][0] self._roi[4 * i + 2] = roi[i][3] self._roi[4 * i + 3] = roi[i][2] time_str = strftime("%H:%M:%S", localtime(evt.getTime().seconds())) title = "r%04d@%s: frame %d on %s" % (evt.run(), time_str, self._nframes, self.address) # XXX No distance in the Andor experiment. So don't bother # with the fictional beam center, distance, and saturation # value? See also mod_average.endjob() img_obj = ( dict( BEAM_CENTER=(0, 0), DATA=self.cspad_img, DETECTOR_ADDRESS=self.address, DISTANCE=10, # XXX Evil kludge to keep dxtbx happy! PIXEL_SIZE=13.5e-3, # XXX Hard-coded, again! SATURATED_VALUE=10000, TIME_TUPLE=cspad_tbx.evt_time(evt), WAVELENGTH=12398.4187 / energy, ), title, ) while not self._queue.empty(): if not self._proc.is_alive(): evt.setStatus(Event.Stop) return while True: try: self._queue.put(img_obj, timeout=1) break except Exception: # Queue.Full: pass self._reset_counters() return