def run(files): assert len(files) == 2 hkl1 = xds_ascii.XDS_ASCII(files[0], sys.stdout) hkl2 = xds_ascii.XDS_ASCII(files[1], sys.stdout) hkl1_points = numpy.column_stack((hkl1.xd, hkl1.yd, hkl1.zd)) tree1 = spatial.cKDTree(hkl1_points) n_ovl, n_nonovl = 0, 0 novl_indices, novl_i, novl_sigma = flex.miller_index(), flex.double(), flex.double() for i in xrange(len(hkl2.indices)): x, y, z = hkl2.xd[i], hkl2.yd[i], hkl2.zd[i] #if z > 180: # continue dists, idxs = tree1.query((x,y,z), k=3, p=1) overlaps = [] for dist, idx in zip(dists, idxs): idx = int(idx) xo, yo, zo = hkl1.xd[idx], hkl1.yd[idx], hkl1.zd[idx] if abs(z-zo) < 2.5 and (xo-x)**2+(yo-y)**2 < 15**2: # FIXME MAGIC NUMBER! overlaps.append((dist,idx)) if len(overlaps) == 0: novl_indices.append(hkl2.indices[i]) novl_i.append(hkl2.iobs[i]) novl_sigma.append(hkl2.sigma_iobs[i]) n_nonovl += 1 else: print hkl2.indices[i], x, y, z for dist, idx in overlaps: xo, yo, zo = hkl1.xd[idx], hkl1.yd[idx], hkl1.zd[idx] print hkl1.indices[idx], xo, yo, zo print dist, idx print print n_ref = len(hkl2.indices) print "%.2f%% overlap!" % (100.*(n_ref-n_nonovl)/n_ref) novl_array = miller.array(miller_set=miller.set(crystal_symmetry=hkl2.symm, indices=novl_indices), data=novl_i, sigmas=novl_sigma) stats = dataset_statistics(novl_array, anomalous=False, sigma_filtering="xds") stats.show(out=sys.stdout) novl_array = novl_array.customized_copy(anomalous_flag=False).map_to_asu() novl_array = novl_array.eliminate_sys_absent() novl_array = novl_array.select(novl_array.sigmas() >= 0) filtr = filter_intensities_by_sigma(novl_array, "xds") hklout = os.path.splitext(os.path.basename(files[1]))[0] + "_novl.mtz" filtr.array_merged.set_observation_type_xray_intensity().as_mtz_dataset(column_root_label="IMEAN").mtz_object().write(hklout)
def xds2mtzmulti(xds_file, dir_name, hklout=None, dmin=None, dmax=None, force_anomalous=False): if hklout is None: hklout = os.path.splitext(os.path.basename(xds_file))[0] + "_multi.mtz" # if output file already exists, exit. if os.path.isfile(os.path.join(dir_name, hklout)): raise Exception(os.path.join(dir_name, hklout), "already exists.") from yamtbx.dataproc.xds import xds_ascii from iotbx import merging_statistics xac = xds_ascii.XDS_ASCII(xds_file) iobs = xac.i_obs(anomalous_flag=True if force_anomalous else None) iobs = iobs.resolution_filter(d_min=dmin, d_max=dmax) iobs = iobs.select(iobs.sigmas() > 0) merge = merging_statistics.filter_intensities_by_sigma(iobs, sigma_filtering="xds").merge mtz_dataset = merge.array().as_mtz_dataset(column_root_label="I") mtz_dataset.add_miller_array(miller_array=merge.redundancies(), column_root_label="MULT") mtz_object = mtz_dataset.mtz_object() mtz_object.write(file_name=os.path.join(dir_name,hklout))
def merging_and_model_statistics(f_obs, f_model, r_free_flags, unmerged_i_obs, n_bins=20, sigma_filtering=Auto, anomalous=False, use_internal_variance=True): """ Compute merging statistics - CC* in particular - together with measures of model quality using reciprocal-space data (R-factors and CCs). See Karplus & Diederichs 2012 for rationale. """ from iotbx import merging_statistics free_sel = r_free_flags # very important: must use original intensities for i_obs, not squared f_obs, # because French-Wilson treatment is one-way assert (unmerged_i_obs.sigmas() is not None) info = unmerged_i_obs.info() assert (info is not None) unmerged_i_obs = unmerged_i_obs.customized_copy(crystal_symmetry=f_obs) unmerged_i_obs = unmerged_i_obs.select( unmerged_i_obs.sigmas() >= 0).set_info(info) filter = merging_statistics.filter_intensities_by_sigma( array=unmerged_i_obs, sigma_filtering=sigma_filtering) i_obs = filter.array_merged unmerged_i_obs = filter.array # average Bijvoet pairs if not anomalous if (not anomalous): if (i_obs.anomalous_flag()): i_obs = i_obs.average_bijvoet_mates() if (f_obs.anomalous_flag()): f_obs = f_obs.average_bijvoet_mates() if (f_model.anomalous_flag()): f_model = f_model.average_bijvoet_mates() if (free_sel.anomalous_flag()): free_sel = free_sel.average_bijvoet_mates() # create Bijvoet pairs if an array is not anomalous else: if (not i_obs.anomalous_flag()): i_obs = i_obs.generate_bijvoet_mates() if (not f_obs.anomalous_flag()): f_obs = f_obs.generate_bijvoet_mates() if (not f_model.anomalous_flag()): f_model = f_model.generate_bijvoet_mates() if (not free_sel.anomalous_flag()): free_sel = free_sel.generate_bijvoet_mates() if (free_sel.data().count(True) == 0): raise Sorry( "R-free array does not select any reflections. To calculate " + "CC* and related statistics, a valid set of R-free flags must be used." ) work_sel = free_sel.customized_copy(data=~free_sel.data()) i_obs, f_obs = i_obs.common_sets(other=f_obs) i_obs, f_model = i_obs.common_sets(other=f_model) i_obs, work_sel = i_obs.common_sets(other=work_sel) i_obs, free_sel = i_obs.common_sets(other=free_sel) i_calc = abs(f_model).f_as_f_sq() d_max, d_min = i_calc.d_max_min() model_arrays = merging_statistics.model_based_arrays(f_obs=f_obs, i_obs=i_obs, i_calc=i_calc, work_sel=work_sel, free_sel=free_sel) return merging_statistics.dataset_statistics( i_obs=unmerged_i_obs, crystal_symmetry=i_calc, d_min=d_min, d_max=d_max, n_bins=n_bins, model_arrays=model_arrays, anomalous=anomalous, use_internal_variance=use_internal_variance, sigma_filtering=None) # no need, since it was done here
def run(files): assert len(files) == 2 hkl1 = xds_ascii.XDS_ASCII(files[0], sys.stdout) hkl2 = xds_ascii.XDS_ASCII(files[1], sys.stdout) hkl1_points = numpy.column_stack((hkl1.xd, hkl1.yd, hkl1.zd)) tree1 = spatial.cKDTree(hkl1_points) n_ovl, n_nonovl = 0, 0 novl_indices, novl_i, novl_sigma = flex.miller_index(), flex.double( ), flex.double() for i in xrange(len(hkl2.indices)): x, y, z = hkl2.xd[i], hkl2.yd[i], hkl2.zd[i] #if z > 180: # continue dists, idxs = tree1.query((x, y, z), k=3, p=1) overlaps = [] for dist, idx in zip(dists, idxs): idx = int(idx) xo, yo, zo = hkl1.xd[idx], hkl1.yd[idx], hkl1.zd[idx] if abs(z - zo) < 2.5 and (xo - x)**2 + ( yo - y)**2 < 15**2: # FIXME MAGIC NUMBER! overlaps.append((dist, idx)) if len(overlaps) == 0: novl_indices.append(hkl2.indices[i]) novl_i.append(hkl2.iobs[i]) novl_sigma.append(hkl2.sigma_iobs[i]) n_nonovl += 1 else: print hkl2.indices[i], x, y, z for dist, idx in overlaps: xo, yo, zo = hkl1.xd[idx], hkl1.yd[idx], hkl1.zd[idx] print hkl1.indices[idx], xo, yo, zo print dist, idx print print n_ref = len(hkl2.indices) print "%.2f%% overlap!" % (100. * (n_ref - n_nonovl) / n_ref) novl_array = miller.array(miller_set=miller.set(crystal_symmetry=hkl2.symm, indices=novl_indices), data=novl_i, sigmas=novl_sigma) stats = dataset_statistics(novl_array, anomalous=False, sigma_filtering="xds") stats.show(out=sys.stdout) novl_array = novl_array.customized_copy(anomalous_flag=False).map_to_asu() novl_array = novl_array.eliminate_sys_absent() novl_array = novl_array.select(novl_array.sigmas() >= 0) filtr = filter_intensities_by_sigma(novl_array, "xds") hklout = os.path.splitext(os.path.basename(files[1]))[0] + "_novl.mtz" filtr.array_merged.set_observation_type_xray_intensity().as_mtz_dataset( column_root_label="IMEAN").mtz_object().write(hklout)
def merging_and_model_statistics ( f_obs, f_model, r_free_flags, unmerged_i_obs, n_bins=20, sigma_filtering=Auto, anomalous=False, use_internal_variance=True) : """ Compute merging statistics - CC* in particular - together with measures of model quality using reciprocal-space data (R-factors and CCs). See Karplus & Diederichs 2012 for rationale. """ from iotbx import merging_statistics free_sel = r_free_flags # very important: must use original intensities for i_obs, not squared f_obs, # because French-Wilson treatment is one-way assert (unmerged_i_obs.sigmas() is not None) info = unmerged_i_obs.info() assert (info is not None) unmerged_i_obs = unmerged_i_obs.customized_copy(crystal_symmetry=f_obs) unmerged_i_obs = unmerged_i_obs.select( unmerged_i_obs.sigmas() >= 0).set_info(info) filter = merging_statistics.filter_intensities_by_sigma( array=unmerged_i_obs, sigma_filtering=sigma_filtering) i_obs = filter.array_merged unmerged_i_obs = filter.array # average Bijvoet pairs if not anomalous if (not anomalous): if (i_obs.anomalous_flag()): i_obs = i_obs.average_bijvoet_mates() if (f_obs.anomalous_flag()): f_obs = f_obs.average_bijvoet_mates() if (f_model.anomalous_flag()): f_model = f_model.average_bijvoet_mates() if (free_sel.anomalous_flag()): free_sel = free_sel.average_bijvoet_mates() # create Bijvoet pairs if an array is not anomalous else: if (not i_obs.anomalous_flag()): i_obs = i_obs.generate_bijvoet_mates() if (not f_obs.anomalous_flag()): f_obs = f_obs.generate_bijvoet_mates() if (not f_model.anomalous_flag()): f_model = f_model.generate_bijvoet_mates() if (not free_sel.anomalous_flag()): free_sel = free_sel.generate_bijvoet_mates() if (free_sel.data().count(True) == 0) : raise Sorry("R-free array does not select any reflections. To calculate "+ "CC* and related statistics, a valid set of R-free flags must be used.") work_sel = free_sel.customized_copy(data=~free_sel.data()) i_obs, f_model = i_obs.common_sets(other=f_model) i_obs, f_obs = i_obs.common_sets(other=f_obs) i_obs, work_sel = i_obs.common_sets(other=work_sel) i_obs, free_sel = i_obs.common_sets(other=free_sel) i_calc = abs(f_model).f_as_f_sq() d_max, d_min = i_calc.d_max_min() model_arrays = merging_statistics.model_based_arrays( f_obs=f_obs, i_obs=i_obs, i_calc=i_calc, work_sel=work_sel, free_sel=free_sel) return merging_statistics.dataset_statistics( i_obs=unmerged_i_obs, crystal_symmetry=i_calc, d_min=d_min, d_max=d_max, n_bins=n_bins, model_arrays=model_arrays, anomalous=anomalous, use_internal_variance=use_internal_variance, sigma_filtering=None) # no need, since it was done here