def get_symop_correlation_coefficients(miller_array, use_binning=False): from copy import deepcopy from scitbx.array_family import flex from cctbx import miller corr_coeffs = flex.double() n_refs = flex.int() space_group = miller_array.space_group() for smx in space_group.smx(): reindexed_array = miller_array.change_basis( sgtbx.change_of_basis_op(smx)) intensity, intensity_rdx = reindexed_array.common_sets(miller_array) if use_binning: intensity.use_binning_of(miller_array) intensity_rdx.use_binning_of(miller_array) cc = intensity.correlation(intensity_rdx, use_binning=use_binning) corr_coeffs.append( flex.mean_weighted( flex.double(i for i in cc.data if i is not None), flex.double(j for i, j in zip(cc.data, cc.binner.counts()) if i is not None))) else: corr_coeffs.append( intensity.correlation(intensity_rdx, use_binning=use_binning).coefficient()) n_refs.append(intensity.size()) return corr_coeffs, n_refs
def get_symop_correlation_coefficients(miller_array, use_binning=False): from copy import deepcopy from scitbx.array_family import flex from cctbx import miller corr_coeffs = flex.double() n_refs = flex.int() space_group = miller_array.space_group() for smx in space_group.smx(): reindexed_array = miller_array.change_basis(sgtbx.change_of_basis_op(smx)) intensity, intensity_rdx = reindexed_array.common_sets(miller_array) if use_binning: intensity.use_binning_of(miller_array) intensity_rdx.use_binning_of(miller_array) cc = intensity.correlation(intensity_rdx, use_binning=use_binning) corr_coeffs.append( flex.mean_weighted( flex.double(i for i in cc.data if i is not None), flex.double(j for i, j in zip(cc.data, cc.binner.counts()) if i is not None), ) ) else: corr_coeffs.append(intensity.correlation(intensity_rdx, use_binning=use_binning).coefficient()) n_refs.append(intensity.size()) return corr_coeffs, n_refs
def calculate_residue_mean_normalised_b_factors(self): """Extract Mean-B values in each of the structures""" # -----------------------------------------------------> self.tables.residue_observations.loc[:, :, 'mean-bz-all'] = numpy.nan self.tables.residue_observations.loc[:, :, 'mean-bz-backbone'] = numpy.nan self.tables.residue_observations.loc[:, :, 'mean-bz-sidechain'] = numpy.nan # -----------------------------------------------------> print('------------------------------------>') for lab_h, pdb_h in zip(self.structures.labels, self.structures.hierarchies): print('Calculating Local Normalised Mean B-Factors: {}'.format( lab_h)) # Normalise the b-factors of the structure pdb_h_z = normalise_b_factors_to_z_scores(pdb_hierarchy=pdb_h, method='protein') cache = pdb_h_z.atom_selection_cache() # Non-Hydrogens for c in conformers_via_residue_groups( s_select.non_h(hierarchy=pdb_h_z, cache=cache)): res_lab = make_label(c) res_mean_b = flex.mean_weighted(c.atoms().extract_b(), c.atoms().extract_occ()) self.tables.residue_observations.set_value( res_lab, lab_h, 'mean-bz-all', res_mean_b) # Backbone Atoms for c in conformers_via_residue_groups( s_select.backbone(hierarchy=pdb_h_z, cache=cache)): res_lab = make_label(c) res_mean_b = flex.mean_weighted(c.atoms().extract_b(), c.atoms().extract_occ()) self.tables.residue_observations.set_value( res_lab, lab_h, 'mean-bz-backbone', res_mean_b) # Sidechain Atoms for c in conformers_via_residue_groups( s_select.sidechains(hierarchy=pdb_h_z, cache=cache)): res_lab = make_label(c) res_mean_b = flex.mean_weighted(c.atoms().extract_b(), c.atoms().extract_occ()) self.tables.residue_observations.set_value( res_lab, lab_h, 'mean-bz-sidechain', res_mean_b)
def __init__(self, i_obs, crystal_symmetry=None, d_min=None, d_max=None, anomalous=False, n_bins=10, binning_method='volume', debug=False, file_name=None, model_arrays=None, sigma_filtering=Auto, use_internal_variance=True, eliminate_sys_absent=True, d_min_tolerance=1.e-6, extend_d_max_min=False, cc_one_half_significance_level=None, cc_one_half_method='half_dataset', assert_is_not_unique_set_under_symmetry=True, log=None): self.file_name = file_name if (log is None): log = null_out() assert (i_obs.sigmas() is not None) info = i_obs.info() sigma_filtering = get_filtering_convention(i_obs, sigma_filtering) if (crystal_symmetry is None): assert (i_obs.space_group() is not None) crystal_symmetry = i_obs.crystal_symmetry() self.crystal_symmetry = crystal_symmetry i_obs = i_obs.customized_copy( crystal_symmetry=crystal_symmetry).set_info(info) if (assert_is_not_unique_set_under_symmetry and i_obs.is_unique_set_under_symmetry()): raise Sorry( ("The data in %s are already merged. Only unmerged (but " + "scaled) data may be used in this program.") % i_obs.info().label_string()) d_min_cutoff = d_min d_max_cutoff = d_max if (d_min is not None): d_min_cutoff *= (1 - d_min_tolerance) if (d_max is not None): assert (d_max > d_min) if (d_max is not None): d_max_cutoff *= 1 + d_min_tolerance i_obs = i_obs.resolution_filter(d_min=d_min_cutoff, d_max=d_max_cutoff).set_info(info) if (i_obs.size() == 0): raise Sorry( "No reflections left after applying resolution cutoffs.") i_obs.show_summary(f=log) self.anom_extra = "" if (not anomalous): i_obs = i_obs.customized_copy(anomalous_flag=False).set_info(info) self.anom_extra = " (non-anomalous)" overall_d_max_min = None # eliminate_sys_absent() before setting up binner to ensure consistency # between reported overall d_min/max and d_min/max for resolution bins" if eliminate_sys_absent: i_obs = i_obs.eliminate_sys_absent() if extend_d_max_min: i_obs.setup_binner(n_bins=n_bins, d_max=d_max_cutoff, d_min=d_min_cutoff) overall_d_max_min = d_max_cutoff, d_min_cutoff else: if binning_method == 'volume': i_obs.setup_binner(n_bins=n_bins) elif binning_method == 'counting_sorted': i_obs.setup_binner_counting_sorted(n_bins=n_bins) self.overall = merging_stats( i_obs, d_max_min=overall_d_max_min, model_arrays=model_arrays, anomalous=anomalous, debug=debug, sigma_filtering=sigma_filtering, use_internal_variance=use_internal_variance, cc_one_half_significance_level=cc_one_half_significance_level, cc_one_half_method=cc_one_half_method) self.bins = [] title = "Intensity merging statistics" column_labels = [ "1/d**2", "N(obs)", "N(unique)", "Redundancy", "Completeness", "Mean(I)", "Mean(I/sigma)", "R-merge", "R-meas", "R-pim", "CC1/2", "CC(anom)" ] graph_names = [ "Reflection counts", "Redundancy", "Completeness", "Mean(I)", "Mean(I/sigma)", "R-factors", "CC1/2", "CC(anom)" ] graph_columns = [[0, 1, 2], [0, 3], [0, 4], [0, 5], [0, 6], [0, 7, 8, 9], [0, 10], [0, 13]] if cc_one_half_significance_level is not None: column_labels.extend( ["CC1/2 significance", "CC1/2 critical value"]) graph_names.extend(["CC1/2 significance", "CC1/2 critical value"]) graph_columns[-2] = [0, 10, 12] #--- CC* mode if (model_arrays is not None): title = "Model quality and intensity merging statistics" column_labels.extend( ["CC*", "CC(work)", "CC(free)", "R-work", "R-free"]) graph_names.extend(["CC*", "Model R-factors"]) graph_columns.extend([[0, 11, 14, 15], [0, 16, 17]]) #--- self.table = data_plots.table_data(title=title, column_labels=column_labels, graph_names=graph_names, graph_columns=graph_columns, x_is_inverse_d_min=True, force_exact_x_labels=True) last_bin = None for bin in i_obs.binner().range_used(): sele_unmerged = i_obs.binner().selection(bin) bin_stats = merging_stats( i_obs.select(sele_unmerged), d_max_min=i_obs.binner().bin_d_range(bin), model_arrays=model_arrays, anomalous=anomalous, debug=debug, sigma_filtering=sigma_filtering, use_internal_variance=use_internal_variance, cc_one_half_significance_level=cc_one_half_significance_level, cc_one_half_method=cc_one_half_method) self.bins.append(bin_stats) self.table.add_row(bin_stats.table_data()) from scitbx.array_family import flex self.cc_one_half_overall = flex.mean_weighted( flex.double(b.cc_one_half for b in self.bins), flex.double(b.cc_one_half_n_refl for b in self.bins)) self.cc_one_half_sigma_tau_overall = flex.mean_weighted( flex.double(b.cc_one_half_sigma_tau for b in self.bins), flex.double(b.cc_one_half_sigma_tau_n_refl for b in self.bins))
def occupancy_weighted_average_b_factor(atoms): return flex.mean_weighted(atoms.extract_b(), atoms.extract_occ())
def weighted_error_between(left, right, weights): differences = left - right return flex.mean_weighted( differences.dot( differences ), weights )