def find_merge_common_images(args): phil = iotbx.phil.process_command_line(args = args, master_string = master_phil).show() work_params = phil.work.extract() if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ((work_params.model is None) and work_params.scaling.algorithm != "mark1")) : raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)) : raise Usage( "If rescale_with_average_cell=True, you must also specify "+ "set_average_unit_cell=True.") # Read Nat's reference model from an MTZ file. XXX The observation # type is given as F, not I--should they be squared? Check with Nat! log = open("%s_%s_scale.log" % (work_params.output.prefix, work_params.scaling.algorithm), "w") out = multi_out() out.register("log", log, atexit_send_to=None) out.register("stdout", sys.stdout) print >> out, "Target unit cell and space group:" print >> out, " ", work_params.target_unit_cell print >> out, " ", work_params.target_space_group uc = work_params.target_unit_cell miller_set = symmetry( unit_cell=work_params.target_unit_cell, space_group_info=work_params.target_space_group ).build_miller_set( anomalous_flag=not work_params.merge_anomalous, d_min=work_params.d_min) print 'Miller set size: %d' % len(miller_set.indices()) from xfel.cxi.merging.general_fcalc import random_structure i_model = random_structure(work_params) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager( miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.read_all() print "finished reading" sg = miller_set.space_group() pg = sg.build_derived_laue_group() miller_set.show_summary() hkl_asu = scaler.observations["hkl_id"] imageno = scaler.observations["frame_id"] intensi = scaler.observations["i"] sigma_i = scaler.observations["sigi"] lookup = scaler.millers["merged_asu_hkl"] # construct table of start / end indices for frames: now using Python # range indexing starts = [0] ends = [] for x in xrange(1, len(scaler.observations["hkl_id"])): if imageno[x] != imageno[x - 1]: ends.append(x) starts.append(x) ends.append(len(scaler.observations["hkl_id"])) keep_start = [] keep_end = [] def nint(a): return int(round(a)) from collections import defaultdict i_scale = 0.1 i_hist = defaultdict(int) for j, se in enumerate(zip(starts, ends)): s, e = se for i in intensi[s:e]: i_hist[nint(i_scale * i)] += 1 isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s) dmin = 100.0 for x in xrange(s, e): d = uc.d(lookup[hkl_asu[x]]) if d < dmin: dmin = d if isig > 6.0 and dmin < 3.2: keep_start.append(s) keep_end.append(e) fout = open('i_hist.dat', 'w') for i in i_hist: fout.write('%.2f %d\n' % (i / i_scale, i_hist[i])) fout.close() starts = keep_start ends = keep_end print 'Keeping %d frames' % len(starts) frames = [] odd = 0 even = 0 for s, e in zip(starts, ends): for x in range(s, e): hkl = lookup[hkl_asu[x]] if (hkl[0] + hkl[1] + hkl[2]) % 2 == 1: odd += 1 else: even += 1 indices = [tuple(lookup[hkl_asu[x]]) for x in range(s, e)] intensities = intensi[s:e] sigmas = sigma_i[s:e] frames.append(Frame(uc, indices, intensities, sigmas)) # pre-scale the data - first determine average ln(k), B; then apply kbs = [f.kb() for f in frames] mn_k = sum([kb[0] for kb in kbs]) / len(kbs) mn_B = sum([kb[1] for kb in kbs]) / len(kbs) n_lt_500 = 0 n_gt_500 = 0 for j, f in enumerate(frames): s_i = f.scale_to_kb(mn_k, mn_B) fout = open('frame-s-i-%05d.dat' % j, 'w') for s, i, si in s_i: fout.write('%f %f %f\n' % (s, i, si)) if i < 500: n_lt_500 += 1 else: n_gt_500 += 1 fout.close() from collections import defaultdict hist = defaultdict(int) fout = open('kb.dat', 'w') for j, f in enumerate(frames): kb = f.kb() fout.write('%4d %6.3f %6.3f\n' % (j, kb[0], kb[1])) hist[int(round(kb[1]))] += 1 fout.close() for b in sorted(hist): print b, hist[b] print odd, even print n_lt_500, n_gt_500 return
def run(args): phil = iotbx.phil.process_command_line( args = args, master_string = master_phil) work_params = phil.work.extract() if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ((work_params.model is None) and work_params.scaling.algorithm != "mark1")): raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)) : raise Usage("If rescale_with_average_cell=True, you must also specify "+ "set_average_unit_cell=True.") miller_set = symmetry( unit_cell = work_params.target_unit_cell, space_group_info = work_params.target_space_group ).build_miller_set( anomalous_flag = not work_params.merge_anomalous, d_min = work_params.d_min) from xfel.cxi.merging.general_fcalc import random_structure i_model = random_structure(work_params) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager( miller_set = miller_set, i_model = i_model, params = work_params) scaler.read_all() sg = miller_set.space_group() pg = sg.build_derived_laue_group() rational_ops = [] for symop in pg: rational_ops.append((matrix.sqr(symop.r().transpose().as_rational()), symop.r().as_hkl())) # miller_set.show_summary() uc = work_params.target_unit_cell hkl_asu = scaler.observations["hkl_id"] imageno = scaler.observations["frame_id"] intensi = scaler.observations["i"] sigma_i = scaler.observations["sigi"] lookup = scaler.millers["merged_asu_hkl"] origH = scaler.observations["H"] origK = scaler.observations["K"] origL = scaler.observations["L"] from cctbx.miller import map_to_asu sgtype = miller_set.space_group_info().type() aflag = miller_set.anomalous_flag() from cctbx.array_family import flex # FIXME in here perform the mapping to ASU for both the original and other # index as an array-wise manipulation to make things a bunch faster... # however this also uses a big chunk of RAM... FIXME also in here use # cb_op.apply(indices) to get the indices reindexed... original_indices = flex.miller_index() for x in xrange(len(scaler.observations["hkl_id"])): original_indices.append(lookup[hkl_asu[x]]) from cctbx.sgtbx import change_of_basis_op I23 = change_of_basis_op('k, -h, l') other_indices = I23.apply(original_indices) map_to_asu(sgtype, aflag, original_indices) map_to_asu(sgtype, aflag, other_indices) # FIXME would be useful in here to have a less expensive way of finding the # symmetry operation which gave the map to the ASU - perhaps best way is to # make a new C++ map_to_asu which records this. # FIXME in here recover the original frame structure of the data to # logical frame objetcs - N.B. the frame will need to be augmented to test # alternative indexings # construct table of start / end indices for frames: now using Python # range indexing starts = [0] ends = [] for x in xrange(1, len(scaler.observations["hkl_id"])): if imageno[x] != imageno[x - 1]: ends.append(x) starts.append(x) ends.append(len(scaler.observations["hkl_id"])) keep_start = [] keep_end = [] for j, se in enumerate(zip(starts, ends)): print 'processing frame %d: %d to %d' % (j, se[0], se[1]) s, e = se isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s) dmin = 100.0 for x in xrange(s, e): d = uc.d(lookup[hkl_asu[x]]) if d < dmin: dmin = d if isig > 6.0 and dmin < 3.2: keep_start.append(s) keep_end.append(e) starts = keep_start ends = keep_end print 'Keeping %d frames' % len(starts) # then start running the comparison code frames = [] for s, e in zip(starts, ends): # FIXME need this from remap to ASU misym = [0 for x in range(s, e)] indices = [original_indices[x] for x in range(s, e)] other = [other_indices[x] for x in range(s, e)] intensities = intensi[s:e] sigmas = sigma_i[s:e] frames.append(Frame(uc, indices, other, intensities, sigmas)) reference = FrameFromReferenceMTZ() fout = open('cc_reference.log', 'w') for j, f in enumerate(frames): _cc = reference.cc(f) _oo = reference.cc_other(f) print '%d %d %d %d %f %d %f' % (j, starts[j], ends[j], _cc[0], _cc[1], _oo[0], _oo[1]) fout.write('%d %d %d %d %f %d %f\n' % (j, starts[j], ends[j], _cc[0], _cc[1], _oo[0], _oo[1])) fout.close() return
def find_merge_common_images(args): phil = iotbx.phil.process_command_line(args = args, master_string = master_phil).show() work_params = phil.work.extract() if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ((work_params.model is None) and work_params.scaling.algorithm != "mark1")) : raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)) : raise Usage( "If rescale_with_average_cell=True, you must also specify "+ "set_average_unit_cell=True.") # Read Nat's reference model from an MTZ file. XXX The observation # type is given as F, not I--should they be squared? Check with Nat! log = open("%s_%s_scale.log" % (work_params.output.prefix, work_params.scaling.algorithm), "w") out = multi_out() out.register("log", log, atexit_send_to=None) out.register("stdout", sys.stdout) print >> out, "Target unit cell and space group:" print >> out, " ", work_params.target_unit_cell print >> out, " ", work_params.target_space_group uc = work_params.target_unit_cell miller_set = symmetry( unit_cell=work_params.target_unit_cell, space_group_info=work_params.target_space_group ).build_miller_set( anomalous_flag=not work_params.merge_anomalous, d_min=work_params.d_min) from xfel.cxi.merging.general_fcalc import random_structure i_model = random_structure(work_params) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager( miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.read_all() print "finished reading" sg = miller_set.space_group() pg = sg.build_derived_laue_group() miller_set.show_summary() hkl_asu = scaler.observations["hkl_id"] imageno = scaler.observations["frame_id"] intensi = scaler.observations["i"] sigma_i = scaler.observations["sigi"] lookup = scaler.millers["merged_asu_hkl"] # construct table of start / end indices for frames: now using Python # range indexing starts = [0] ends = [] for x in xrange(1, len(scaler.observations["hkl_id"])): if imageno[x] != imageno[x - 1]: ends.append(x) starts.append(x) ends.append(len(scaler.observations["hkl_id"])) keep_start = [] keep_end = [] for j, se in enumerate(zip(starts, ends)): s, e = se isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s) dmin = 100.0 for x in xrange(s, e): d = uc.d(lookup[hkl_asu[x]]) if d < dmin: dmin = d if isig > 6.0 and dmin < 3.2: keep_start.append(s) keep_end.append(e) starts = keep_start ends = keep_end print 'Keeping %d frames' % len(starts) frames = [] for s, e in zip(starts, ends): indices = [tuple(lookup[hkl_asu[x]]) for x in range(s, e)] intensities = intensi[s:e] sigmas = sigma_i[s:e] frames.append(Frame(uc, indices, intensities, sigmas)) cycle = 0 total_nref = sum([len(f.get_indices()) for f in frames]) # pre-scale the data - first determine average ln(k), B; then apply kbs = [f.kb() for f in frames] mn_k = sum([kb[0] for kb in kbs]) / len(kbs) mn_B = sum([kb[1] for kb in kbs]) / len(kbs) for f in frames: f.scale_to_kb(mn_k, mn_B) while True: print 'Analysing %d frames' % len(frames) print 'Cycle %d' % cycle cycle += 1 print 'Power spectrum' fn = frame_numbers(frames) for j in sorted(fn): print '%4d %4d' % (j, fn[j]) nref_cycle = sum([len(f.get_indices()) for f in frames]) assert(nref_cycle == total_nref) common_reflections = numpy.zeros((len(frames), len(frames)), dtype = numpy.short) obs = { } from cctbx.sgtbx import rt_mx, change_of_basis_op oh = change_of_basis_op(rt_mx('h,l,k')) for j, f in enumerate(frames): indices = set(f.get_indices()) for i in indices: _i = tuple(i) if not _i in obs: obs[_i] = [] obs[_i].append(j) # work through unique observations ignoring those which include no # hand information for hkl in obs: if hkl == oh.apply(hkl): continue obs[hkl].sort() for j, f1 in enumerate(obs[hkl][:-1]): for f2 in obs[hkl][j + 1:]: common_reflections[(f1, f2)] += 1 cmn_rfl_list = [] for f1 in range(len(frames)): for f2 in range(f1 + 1, len(frames)): if common_reflections[(f1, f2)] > 20: cmn_rfl_list.append((common_reflections[(f1, f2)], f1, f2)) cmn_rfl_list.sort() cmn_rfl_list.reverse() joins = [] used = [] for n, f1, f2 in cmn_rfl_list: if f1 in used or f2 in used: continue _cc = frames[f1].cc(frames[f2]) # really only need to worry about f2 which will get merged... # merging multiple files together should be OK provided they are # correctly sorted (though the order should not matter anyhow?) # anyhow they are sorted anyway... ah as f2 > f1 then just sorting # the list by f2 will make sure the data cascase correctly. # p-value very small for cc > 0.75 for > 20 observations - necessary # as will be correlated due to Wilson curves if _cc[0] > 20 and _cc[1] > 0.75: print '%4d %.3f' % _cc, f1, f2 joins.append((f2, f1)) # used.append(f1) used.append(f2) if not joins: print 'No pairs found' break joins.sort() joins.reverse() for j2, j1 in joins: rmerge = frames[j1].merge(frames[j2]) if rmerge: print 'R: %4d %4d %6.3f' % (j1, j2, rmerge) else: print 'R: %4d %4d ------' % (j1, j2) continue frames.sort() print 'Biggest few: #frames; #unique refl' j = -1 while frames[j].get_frames() > 1: print frames[j].get_frames(), frames[j].get_unique_indices() j -= 1 return
def run(args): phil = iotbx.phil.process_command_line( args = args, master_string = master_phil) work_params = phil.work.extract() if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ((work_params.model is None) and work_params.scaling.algorithm != "mark1")): raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)) : raise Usage("If rescale_with_average_cell=True, you must also specify "+ "set_average_unit_cell=True.") miller_set = symmetry( unit_cell = work_params.target_unit_cell, space_group_info = work_params.target_space_group ).build_miller_set( anomalous_flag = not work_params.merge_anomalous, d_min = work_params.d_min) from xfel.cxi.merging.general_fcalc import random_structure i_model = random_structure(work_params) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager( miller_set = miller_set, i_model = i_model, params = work_params) scaler.read_all() sg = miller_set.space_group() pg = sg.build_derived_laue_group() rational_ops = [] for symop in pg: rational_ops.append((matrix.sqr(symop.r().transpose().as_rational()), symop.r().as_hkl())) # miller_set.show_summary() uc = work_params.target_unit_cell hkl_asu = scaler.observations["hkl_id"] imageno = scaler.observations["frame_id"] intensi = scaler.observations["i"] sigma_i = scaler.observations["sigi"] lookup = scaler.millers["merged_asu_hkl"] origH = scaler.observations["H"] origK = scaler.observations["K"] origL = scaler.observations["L"] from cctbx.miller import map_to_asu sgtype = miller_set.space_group_info().type() aflag = miller_set.anomalous_flag() from cctbx.array_family import flex # FIXME in here perform the mapping to ASU for both the original and other # index as an array-wise manipulation to make things a bunch faster... # however this also uses a big chunk of RAM... FIXME also in here use # cb_op.apply(indices) to get the indices reindexed... original_indices = flex.miller_index() for x in xrange(len(scaler.observations["hkl_id"])): original_indices.append(lookup[hkl_asu[x]]) from cctbx.sgtbx import change_of_basis_op I23 = change_of_basis_op('k, -h, l') other_indices = I23.apply(original_indices) map_to_asu(sgtype, aflag, original_indices) map_to_asu(sgtype, aflag, other_indices) # FIXME would be useful in here to have a less expensive way of finding the # symmetry operation which gave the map to the ASU - perhaps best way is to # make a new C++ map_to_asu which records this. # FIXME in here recover the original frame structure of the data to # logical frame objetcs - N.B. the frame will need to be augmented to test # alternative indexings # construct table of start / end indices for frames: now using Python # range indexing starts = [0] ends = [] for x in xrange(1, len(scaler.observations["hkl_id"])): if imageno[x] != imageno[x - 1]: ends.append(x) starts.append(x) ends.append(len(scaler.observations["hkl_id"])) keep_start = [] keep_end = [] for j, se in enumerate(zip(starts, ends)): print 'processing frame %d: %d to %d' % (j, se[0], se[1]) s, e = se isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s) dmin = 100.0 for x in xrange(s, e): d = uc.d(lookup[hkl_asu[x]]) if d < dmin: dmin = d if isig > 6.0 and dmin < 3.2: keep_start.append(s) keep_end.append(e) starts = keep_start ends = keep_end print 'Keeping %d frames' % len(starts) # then start running the comparison code frames = [] for s, e in zip(starts, ends): # FIXME need this from remap to ASU misym = [0 for x in range(s, e)] indices = [original_indices[x] for x in range(s, e)] other = [other_indices[x] for x in range(s, e)] intensities = intensi[s:e] sigmas = sigma_i[s:e] frames.append(Frame(uc, indices, other, intensities, sigmas)) cycle = 0 total_nref = sum([len(f.get_indices()) for f in frames]) # pre-scale the data - first determine average ln(k), B; then apply kbs = [f.kb() for f in frames] mn_k = sum([kb[0] for kb in kbs]) / len(kbs) mn_B = sum([kb[1] for kb in kbs]) / len(kbs) for f in frames: f.scale_to_kb(mn_k, mn_B) while True: print 'Analysing %d frames' % len(frames) print 'Cycle %d' % cycle cycle += 1 print 'Power spectrum' fn = frame_numbers(frames) for j in sorted(fn): print '%4d %4d' % (j, fn[j]) nref_cycle = sum([len(f.get_indices()) for f in frames]) assert(nref_cycle == total_nref) # first work on the original indices import numpy common_reflections = numpy.zeros((len(frames), len(frames)), dtype = numpy.short) obs = { } # for other hand add -j for j, f in enumerate(frames): indices = set(f.get_indices()) for i in indices: _i = tuple(i) if not _i in obs: obs[_i] = [] obs[_i].append(j) for hkl in obs: obs[hkl].sort() for j, f1 in enumerate(obs[hkl][:-1]): for f2 in obs[hkl][j + 1:]: if f1 * f2 > 0: common_reflections[(abs(f1), abs(f2))] += 1 cmn_rfl_list = [] for f1 in range(len(frames)): for f2 in range(f1 + 1, len(frames)): if common_reflections[(f1, f2)] > 10: cmn_rfl_list.append((common_reflections[(f1, f2)], f1, f2)) cmn_rfl_list.sort() cmn_rfl_list.reverse() joins = [] used = [] for n, f1, f2 in cmn_rfl_list: if f1 in used or f2 in used: continue _cc = frames[f1].cc(frames[f2]) # really only need to worry about f2 which will get merged... # merging multiple files together should be OK provided they are # correctly sorted (though the order should not matter anyhow?) # anyhow they are sorted anyway... ah as f2 > f1 then just sorting # the list by f2 will make sure the data cascase correctly. # p-value small (3% ish) for cc > 0.6 for > 10 observations - # necessary as will be correlated due to Wilson curves though # with B factor < 10 this is less of an issue if _cc[0] > 10 and _cc[1] > 0.6: print '%4d %.3f' % _cc, f1, f2 joins.append((f2, f1)) used.append(f2) if not joins: print 'No pairs found' break joins.sort() joins.reverse() for j2, j1 in joins: rmerge = frames[j1].merge(frames[j2]) if rmerge: print 'R: %4d %4d %6.3f' % (j1, j2, rmerge) else: print 'R: %4d %4d ------' % (j1, j2) all_joins = [j for j in joins] # then do the same for the alternative indices other_reflections = numpy.zeros((len(frames), len(frames)), dtype = numpy.short) obs = { } # for other hand add -j for j, f in enumerate(frames): indices = set(f.get_indices()) for i in indices: _i = tuple(i) if not _i in obs: obs[_i] = [] obs[_i].append(j) indices = set(f.get_other()) for i in indices: _i = tuple(i) if not _i in obs: obs[_i] = [] obs[_i].append(-j) for hkl in obs: obs[hkl].sort() for j, f1 in enumerate(obs[hkl][:-1]): for f2 in obs[hkl][j + 1:]: if f1 * f2 < 0: other_reflections[(abs(f1), abs(f2))] += 1 oth_rfl_list = [] for f1 in range(len(frames)): for f2 in range(f1 + 1, len(frames)): if other_reflections[(f1, f2)] > 10: oth_rfl_list.append((other_reflections[(f1, f2)], f1, f2)) joins = [] oth_rfl_list.sort() oth_rfl_list.reverse() for n, f1, f2 in oth_rfl_list: if f1 in used or f2 in used: continue _cc = frames[f1].cc_other(frames[f2]) # really only need to worry about f2 which will get merged... # merging multiple files together should be OK provided they are # correctly sorted (though the order should not matter anyhow?) # anyhow they are sorted anyway... ah as f2 > f1 then just sorting # the list by f2 will make sure the data cascase correctly. # p-value small (3% ish) for cc > 0.6 for > 10 observations - # necessary as will be correlated due to Wilson curves though # with B factor < 10 this is less of an issue if _cc[0] > 10 and _cc[1] > 0.6: print '%4d %.3f' % _cc, f1, f2 joins.append((f2, f1)) used.append(f2) all_joins += joins if not all_joins: break joins.sort() joins.reverse() for j2, j1 in joins: frames[j2].reindex() rmerge = frames[j1].merge(frames[j2]) if rmerge: print 'R: %4d %4d %6.3f' % (j1, j2, rmerge) else: print 'R: %4d %4d ------' % (j1, j2) continue frames.sort() print 'Biggest few: #frames; #unique refl' j = -1 while frames[j].get_frames() > 1: print frames[j].get_frames(), frames[j].get_unique_indices() frames[j].output_as_scalepack(sg, 'scalepack-%d.sca' % j) j -= 1 return
def find_merge_common_images(args): phil = iotbx.phil.process_command_line(args = args, master_string = master_phil).show() work_params = phil.work.extract() if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ((work_params.model is None) and work_params.scaling.algorithm != "mark1")) : raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)) : raise Usage( "If rescale_with_average_cell=True, you must also specify "+ "set_average_unit_cell=True.") # Read Nat's reference model from an MTZ file. XXX The observation # type is given as F, not I--should they be squared? Check with Nat! log = open("%s_%s_scale.log" % (work_params.output.prefix, work_params.scaling.algorithm), "w") out = multi_out() out.register("log", log, atexit_send_to=None) out.register("stdout", sys.stdout) print >> out, "Target unit cell and space group:" print >> out, " ", work_params.target_unit_cell print >> out, " ", work_params.target_space_group uc = work_params.target_unit_cell miller_set = symmetry( unit_cell=work_params.target_unit_cell, space_group_info=work_params.target_space_group ).build_miller_set( anomalous_flag=not work_params.merge_anomalous, d_min=work_params.d_min) from xfel.cxi.merging.general_fcalc import random_structure i_model = random_structure(work_params) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager( miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.read_all() print "finished reading" sg = miller_set.space_group() pg = sg.build_derived_laue_group() miller_set.show_summary() hkl_asu = scaler.observations["hkl_id"] imageno = scaler.observations["frame_id"] intensi = scaler.observations["i"] sigma_i = scaler.observations["sigi"] lookup = scaler.millers["merged_asu_hkl"] # construct table of start / end indices for frames: now using Python # range indexing starts = [0] ends = [] for x in xrange(1, len(scaler.observations["hkl_id"])): if imageno[x] != imageno[x - 1]: ends.append(x) starts.append(x) ends.append(len(scaler.observations["hkl_id"])) keep_start = [] keep_end = [] for j, se in enumerate(zip(starts, ends)): s, e = se isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s) dmin = 100.0 for x in xrange(s, e): d = uc.d(lookup[hkl_asu[x]]) if d < dmin: dmin = d if isig > 6.0 and dmin < 3.2: keep_start.append(s) keep_end.append(e) starts = keep_start ends = keep_end print 'Keeping %d frames' % len(starts) frames = [] for s, e in zip(starts, ends): indices = [tuple(lookup[hkl_asu[x]]) for x in range(s, e)] intensities = intensi[s:e] sigmas = sigma_i[s:e] frames.append(Frame(uc, indices, intensities, sigmas)) from collections import defaultdict hist = defaultdict(int) fout = open('common_oh.dat', 'w') for j, f in enumerate(frames): hp = f.hand_pairs() fout.write('%4d %d\n' % (j, hp)) hist[int(hp)] += 1 fout.close() for b in sorted(hist): print b, hist[b] return
def find_merge_common_images(args): phil = iotbx.phil.process_command_line(args = args, master_string = master_phil).show() work_params = phil.work.extract() if ("--help" in args) : libtbx.phil.parse(master_phil.show()) return if ((work_params.d_min is None) or (work_params.data is None) or ((work_params.model is None) and work_params.scaling.algorithm != "mark1")) : raise Usage("cxi.merge " "d_min=4.0 " "data=~/scratch/r0220/006/strong/ " "model=3bz1_3bz2_core.pdb") if ((work_params.rescale_with_average_cell) and (not work_params.set_average_unit_cell)) : raise Usage( "If rescale_with_average_cell=True, you must also specify "+ "set_average_unit_cell=True.") # Read Nat's reference model from an MTZ file. XXX The observation # type is given as F, not I--should they be squared? Check with Nat! log = open("%s_%s_scale.log" % (work_params.output.prefix, work_params.scaling.algorithm), "w") out = multi_out() out.register("log", log, atexit_send_to=None) out.register("stdout", sys.stdout) print >> out, "Target unit cell and space group:" print >> out, " ", work_params.target_unit_cell print >> out, " ", work_params.target_space_group uc = work_params.target_unit_cell miller_set = symmetry( unit_cell=work_params.target_unit_cell, space_group_info=work_params.target_space_group ).build_miller_set( anomalous_flag=not work_params.merge_anomalous, d_min=work_params.d_min) from xfel.cxi.merging.general_fcalc import random_structure i_model = random_structure(work_params) # ---- Augment this code with any special procedures for x scaling scaler = xscaling_manager( miller_set=miller_set, i_model=i_model, params=work_params, log=out) scaler.read_all() print "finished reading" sg = miller_set.space_group() pg = sg.build_derived_laue_group() miller_set.show_summary() hkl_asu = scaler.observations["hkl_id"] imageno = scaler.observations["frame_id"] intensi = scaler.observations["i"] sigma_i = scaler.observations["sigi"] lookup = scaler.millers["merged_asu_hkl"] # construct table of start / end indices for frames: now using Python # range indexing starts = [0] ends = [] for x in xrange(1, len(scaler.observations["hkl_id"])): if imageno[x] != imageno[x - 1]: ends.append(x) starts.append(x) ends.append(len(scaler.observations["hkl_id"])) keep_start = [] keep_end = [] for j, se in enumerate(zip(starts, ends)): s, e = se isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s) dmin = 100.0 for x in xrange(s, e): d = uc.d(lookup[hkl_asu[x]]) if d < dmin: dmin = d if isig > 6.0 and dmin < 3.2: keep_start.append(s) keep_end.append(e) starts = keep_start ends = keep_end print 'Keeping %d frames' % len(starts) frames = [] for s, e in zip(starts, ends): indices = [tuple(lookup[hkl_asu[x]]) for x in range(s, e)] intensities = intensi[s:e] sigmas = sigma_i[s:e] frames.append(Frame(indices, intensities, sigmas)) while True: print 'Analysing %d frames' % len(frames) common_reflections = numpy.zeros((len(frames), len(frames)), dtype = numpy.short) obs = { } for j, f in enumerate(frames): indices = set(f.get_indices()) for i in indices: _i = tuple(i) if not _i in obs: obs[_i] = [] obs[_i].append(j) for hkl in obs: obs[hkl].sort() for j, f1 in enumerate(obs[hkl][:-1]): for f2 in obs[hkl][j + 1:]: common_reflections[(f1, f2)] += 1 cmn_rfl_list = [] for f1 in range(len(frames)): for f2 in range(f1, len(frames)): cmn_rfl_list.append((common_reflections[(f1, f2)], f1, f2)) cmn_rfl_list.sort() cmn_rfl_list.reverse() joins = [] for n, f1, f2 in cmn_rfl_list: _cc = frames[f1].cc(frames[f2]) if _cc[0] > 20 and _cc[1] > 0.75: # print '===> %d %d' % (n, frames[f1].common(frames[f2])) print '%4d %.3f' % _cc, f1, f2 joins.append((f1, f2)) if not joins: print 'No pairs found' break joins.sort() joins.reverse() for j1, j2 in joins: frames[j1].merge(frames[j2]) frames.remove(frames[j2]) continue return