def _round_of_outlier_rejection(self): """ Calculate normal deviations from the data in the Ih_table. Returns: (tuple): tuple containing: outlier_indices: A flex.size_t array of outlier indices w.r.t the current Ih_table other_potential_outliers: A flex.size_t array of indices from the symmetry groups where outliers were found, excluding the indices of the outliers themselves (indices w.r.t current Ih_table). """ Ih_table = self._Ih_table_block I = Ih_table.intensities g = Ih_table.inverse_scale_factors w = Ih_table.weights wgIsum = ( (w * g * I) * Ih_table.h_index_matrix) * Ih_table.h_expand_matrix wg2sum = ( (w * g * g) * Ih_table.h_index_matrix) * Ih_table.h_expand_matrix wgIsum_others = wgIsum - (w * g * I) wg2sum_others = wg2sum - (w * g * g) # Now do the rejection analyis if n_in_group > 2 nh = Ih_table.calc_nh() sel = nh > 2 wg2sum_others_sel = wg2sum_others.select(sel) wgIsum_others_sel = wgIsum_others.select(sel) # guard against zero divison errors - can happen due to rounding errors # or bad data giving g values are very small zero_sel = wg2sum_others_sel == 0.0 # set as one for now, then mark as outlier below. This will only affect if # g is near zero, if w is zero then throw an assertionerror. wg2sum_others_sel.set_selected(zero_sel, 1.0) g_sel = g.select(sel) I_sel = I.select(sel) w_sel = w.select(sel) assert w_sel.all_gt(0) # guard against division by zero norm_dev = (I_sel - (g_sel * wgIsum_others_sel / wg2sum_others_sel)) / (( (1.0 / w_sel) + (g_sel**2 / wg2sum_others_sel))**0.5) norm_dev.set_selected(zero_sel, 1000) # to trigger rejection z_score = flex.abs(norm_dev) # Want an array same size as Ih table. all_z_scores = flex.double(Ih_table.size, 0.0) all_z_scores.set_selected(sel.iselection(), z_score) outlier_indices, other_potential_outliers = determine_outlier_indices( Ih_table.h_index_matrix, all_z_scores, self._zmax) return outlier_indices, other_potential_outliers
def _round_of_outlier_rejection(self): """ Calculate normal deviations from the data in the Ih_table. """ Ih_table = self._Ih_table_block intensity = Ih_table.intensities g = Ih_table.inverse_scale_factors w = self.weights wgIsum = ((w * g * intensity) * Ih_table.h_index_matrix) * Ih_table.h_expand_matrix wg2sum = ( (w * g * g) * Ih_table.h_index_matrix) * Ih_table.h_expand_matrix wgIsum_others = wgIsum - (w * g * intensity) wg2sum_others = wg2sum - (w * g * g) # Now do the rejection analyis if n_in_group > 2 nh = Ih_table.calc_nh() sel = nh > 2 wg2sum_others_sel = wg2sum_others.select(sel) wgIsum_others_sel = wgIsum_others.select(sel) # guard against zero divison errors - can happen due to rounding errors # or bad data giving g values are very small zero_sel = wg2sum_others_sel == 0.0 # set as one for now, then mark as outlier below. This will only affect if # g is near zero, if w is zero then throw an assertionerror. wg2sum_others_sel.set_selected(zero_sel, 1.0) g_sel = g.select(sel) I_sel = intensity.select(sel) w_sel = w.select(sel) assert w_sel.all_gt(0) # guard against division by zero norm_dev = (I_sel - (g_sel * wgIsum_others_sel / wg2sum_others_sel)) / ( flex.sqrt((1.0 / w_sel) + (flex.pow2(g_sel) / wg2sum_others_sel))) norm_dev.set_selected(zero_sel, 1000) # to trigger rejection z_score = flex.abs(norm_dev) # Want an array same size as Ih table. all_z_scores = flex.double(Ih_table.size, 0.0) all_z_scores.set_selected(sel.iselection(), z_score) outlier_indices, other_potential_outliers = determine_outlier_indices( Ih_table.h_index_matrix, all_z_scores, self._zmax) self._outlier_indices.extend( self._Ih_table_block.Ih_table["loc_indices"].select( outlier_indices)) self._datasets.extend( self._Ih_table_block.Ih_table["dataset_id"].select( outlier_indices)) sel = flex.bool(Ih_table.size, False) sel.set_selected(other_potential_outliers, True) self._Ih_table_block = self._Ih_table_block.select(sel) self.weights = self.weights.select(sel)
def _round_of_outlier_rejection(self): """ Calculate normal deviations from the data in the Ih_table. """ Ih_table = self._Ih_table_block intensity = Ih_table.intensities g = Ih_table.inverse_scale_factors w = self.weights wgIsum = Ih_table.sum_in_groups(w * g * intensity, output="per_refl") wg2sum = Ih_table.sum_in_groups(w * g * g, output="per_refl") wgIsum_others = wgIsum - (w * g * intensity) wg2sum_others = wg2sum - (w * g * g) # Now do the rejection analysis if n_in_group > 2 nh = Ih_table.calc_nh() sel = nh > 2 wg2sum_others_sel = wg2sum_others[sel] wgIsum_others_sel = wgIsum_others[sel] # guard against zero division errors - can happen due to rounding errors # or bad data giving g values are very small zero_sel = wg2sum_others_sel == 0.0 # set as one for now, then mark as outlier below. This will only affect if # g is near zero, if w is zero then throw an assertionerror. wg2sum_others_sel[zero_sel] = 1.0 g_sel = g[sel] I_sel = intensity[sel] w_sel = w[sel] assert np.all(w_sel > 0) # guard against division by zero norm_dev = (I_sel - (g_sel * wgIsum_others_sel / wg2sum_others_sel)) / ( np.sqrt((1.0 / w_sel) + (np.square(g_sel) / wg2sum_others_sel))) norm_dev[zero_sel] = 1000 # to trigger rejection z_score = np.abs(norm_dev) # Want an array same size as Ih table. all_z_scores = np.zeros(Ih_table.size) # all_z_scores.set_selected(sel.iselection(), z_score) all_z_scores[sel] = z_score outlier_indices, other_potential_outliers = determine_outlier_indices( Ih_table.h_index_matrix, flumpy.from_numpy(all_z_scores), self._zmax) sel = np.full(Ih_table.size, False, dtype=bool) outlier_indices = flumpy.to_numpy(outlier_indices) sel[outlier_indices] = True lsel = self._Ih_table_block.Ih_table["loc_indices"].iloc[sel].to_numpy( ) dsel = self._Ih_table_block.Ih_table["dataset_id"].iloc[sel].to_numpy() self._outlier_indices = np.concatenate([ self._outlier_indices, lsel, ]) self._datasets = np.concatenate([ self._datasets, dsel, ]) sel = np.full(Ih_table.size, False, dtype=bool) sel[flumpy.to_numpy(other_potential_outliers)] = True self._Ih_table_block = self._Ih_table_block.select(sel) self.weights = self.weights[sel]