def cut_resolution(self, cycle_number): def est_resol(xscale_hkl, res_params, plt_out): iobs = XDS_ASCII(xscale_hkl, i_only=True).i_obs() est = estimate_resolution_based_on_cc_half(iobs, res_params.cc_one_half_min, res_params.cc_half_tol, res_params.n_bins, log_out=self.out) est.show_plot(False, plt_out) if None not in (est.d_min, est.cc_at_d_min): self.out.write("Best resolution cutoff= %.2f A @CC1/2= %.4f\n" % (est.d_min, est.cc_at_d_min)) else: self.out.write("Can't decide resolution cutoff. No reflections??\n") return est.d_min # est_resol() print >>self.out, "**** Determining resolution cutoff in run_%.2d ****" % cycle_number last_wd = os.path.join(self.workdir_org, "run_%.2d"%cycle_number) xscale_hkl = os.path.abspath(os.path.join(last_wd, "xscale.hkl")) tmpwd = os.path.join(self.workdir_org, "run_%.2d_tmp"%cycle_number) os.mkdir(tmpwd) for i, cc_cut in enumerate((self.res_params.cc_one_half_min*.7, self.res_params.cc_one_half_min)): self.res_params.cc_one_half_min = cc_cut d_min = est_resol(xscale_hkl, self.res_params, os.path.join(tmpwd, "ccfit_%d.pdf"%(i+1))) if d_min is not None and d_min > self.d_min + 0.001: for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(tmpwd, f)) inp_new = os.path.join(tmpwd, "XSCALE.INP") shutil.copyfile(os.path.join(last_wd, "XSCALE.INP"), inp_new) modify_xdsinp(inp_new, [make_bin_str(d_min, self.d_max).split("= ")]) try: xscale.run_xscale(inp_new, cbf_to_dat=True, aniso_analysis=True, use_tmpdir_if_available=self.xscale_params.use_tmpdir_if_available) except: print >>self.out, traceback.format_exc() xscale_hkl = os.path.abspath(os.path.join(tmpwd, "xscale.hkl")) if not os.path.isfile(os.path.join(tmpwd, "XSCALE.INP")): for f in "XSCALE.INP", "XSCALE.LP", "xscale.hkl", "pointless.log", "ccp4": os.symlink(os.path.relpath(os.path.join(last_wd, f), tmpwd), os.path.join(tmpwd, f)) if d_min is not None: self.dmin_est_at_cycles[cycle_number] = d_min os.rename(tmpwd, os.path.join(self.workdir_org, "run_%.2d_%.2fA"%(cycle_number, d_min)))
def run_dials_sequence(filename_template, prefix, nr_range, wdir, known_xs, overrides, scan_varying, nproc): log_out = open(os.path.join(wdir, "dials_sequence.log"), "w") pointless_log = os.path.join(wdir, "pointless.log") # Prepare img_files = find_existing_files_in_template( filename_template, nr_range[0], nr_range[1], datadir=os.path.dirname(prefix), check_compressed=True) if len(img_files) == 0: mylog.error("No files found for %s %s" % (filename_template, nr_range)) return nproc_str = "nproc=%d" % nproc log_out.write("Importing %s range=%s\n" % (img_files, nr_range)) log_out.write(" Overrides: %s\n" % overrides) log_out.flush() override_str = "" # TODO support other stuff.. (wavelength, distance, osc_range, rotation_axis,..) if "orgx" in overrides and "orgy" in overrides: override_str += "slow_fast_beam_centre=%.2f,%.2f " % ( overrides["orgy"], overrides["orgx"]) if len(img_files) == 1 and img_files[0].endswith(".h5"): util.call('dials.import "%s" %s image_range=%d,%d' % (img_files[0], override_str, nr_range[0], nr_range[1]), wdir=wdir, stdout=log_out, expects_out=[os.path.join(wdir, "datablock.json")]) else: util.call('dials.import %s template="%s" image_range=%d,%d' % (override_str, filename_template.replace( "?", "#"), nr_range[0], nr_range[1]), wdir=wdir, stdout=log_out, expects_out=[os.path.join(wdir, "datablock.json")]) util.call( "dials.find_spots datablock.json filter.d_max=30 %s" % nproc_str, # global_threshold=200 wdir=wdir, stdout=log_out, expects_out=[os.path.join(wdir, "strong.pickle")]) util.call("dials.export strong.pickle format=xds xds.directory=.", wdir=wdir, stdout=log_out) index_ok = False for index_meth in ("fft3d", "fft1d", "real_space_grid_search"): for index_assi in ("local", "simple"): if index_ok: break cmd = "dials.index datablock.json strong.pickle verbosity=3 " cmd += "indexing.method=%s index_assignment.method=%s " % ( index_meth, index_assi) if known_xs is not None: # not in (known.space_group, known.unit_cell): cmd += "unit_cell=%s space_group=%d " % ( ",".join( map(lambda x: "%.3f" % x, known_xs.unit_cell().parameters())), known_xs.space_group().type().number()) elif index_meth == "real_space_grid_search": continue log_out.write( "Trying indexing.method=%s index_assignment.method=%s\n" % (index_meth, index_assi)) log_out.flush() util.call(cmd, wdir=wdir, stdout=log_out) if os.path.isfile(os.path.join(wdir, "experiments.json")): index_ok = True else: for f in ("dials.index.log", "dials.index.debug.log"): util.rotate_file(os.path.join(wdir, f)) if not index_ok: return files_for_integration = "experiments.json indexed.pickle" if scan_varying: util.call( "dials.refine experiments.json indexed.pickle scan_varying=true", wdir=wdir, stdout=log_out) if os.path.isfile(os.path.join(wdir, "refined.pickle")): files_for_integration = "refined_experiments.json refined.pickle" else: log_out.write("dials.refine failed. using intedexed results.\n") util.call("dials.integrate %s min_spots.per_degree=10 %s" % (files_for_integration, nproc_str), wdir=wdir, stdout=log_out) util.call( "dials.export integrated.pickle integrated_experiments.json mtz.hklout=integrated.mtz", wdir=wdir, stdout=log_out) util.call("pointless integrated.mtz hklout pointless.mtz", wdir=wdir, stdin="SETTING SYMMETRY-BASED\ntolerance 10\n", stdout=open(pointless_log, "w")) util.call( "dials.export integrated_experiments.json integrated.pickle format=xds_ascii xds_ascii.hklout=DIALS.HKL", wdir=wdir, stdout=log_out) util.call("aimless hklin pointless.mtz hklout aimless.mtz", wdir=wdir, stdin="output UNMERGED\n", stdout=open(os.path.join(wdir, "aimless.log"), "w")) #job_str += "touch dials_job_finished\n" ret = calc_merging_stats(os.path.join(wdir, "aimless_unmerged.mtz")) ret["symm"] = get_most_possible_symmetry(wdir) pickle.dump(ret, open(os.path.join(wdir, "kamo_dials.pkl"), "w"), -1)
def xds_sequence(root, params): print print os.path.relpath(root, params.topdir) xparm = os.path.join(root, "XPARM.XDS") gxparm = os.path.join(root, "GXPARM.XDS") defpix_lp = os.path.join(root, "DEFPIX.LP") correct_lp = os.path.join(root, "CORRECT.LP") integrate_hkl = os.path.join(root, "INTEGRATE.HKL") xac_hkl = os.path.join(root, "XDS_ASCII.HKL") integrate_lp = os.path.join(root, "INTEGRATE.LP") spot_xds = os.path.join(root, "SPOT.XDS") xdsinp = os.path.join(root, "XDS.INP") assert os.path.isfile(xdsinp) xdsinp_dict = dict(get_xdsinp_keyword(xdsinp)) decilog = multi_out() decilog.register("log", open(os.path.join(root, "decision.log"), "a"), atexit_send_to=None) print >> decilog, "xds_sequence started at %s in %s\n" % ( time.strftime("%Y-%m-%d %H:%M:%S"), root) if params.show_progress: decilog.register("stdout", sys.stdout) if params.mode == "initial" and params.resume and os.path.isfile( correct_lp): print " Already processed." return if params.mode == "recycle" and not os.path.isfile(gxparm): print "GXPARM.XDS not found. Cannot do recycle." return if params.fast_delphi and (params.nproc is None or params.nproc > 1): delphi = optimal_delphi_by_nproc(xdsinp=xdsinp, nproc=params.nproc) print " Setting delphi to ", delphi modify_xdsinp(xdsinp, inp_params=[ ("DELPHI", str(delphi)), ]) if params.nproc is not None and params.nproc > 1: modify_xdsinp(xdsinp, inp_params=[ ("MAXIMUM_NUMBER_OF_PROCESSORS", str(params.nproc)), ]) if params.mode == "initial": # Peak search modify_xdsinp(xdsinp, inp_params=[("JOB", "XYCORR INIT COLSPOT")]) run_xds(wdir=root, show_progress=params.show_progress) if params.auto_frame_exclude_spot_based: sx = idxreflp.SpotXds(spot_xds) sx.set_xdsinp(xdsinp) spots = filter(lambda x: 5 < x[-1] < 30, sx.collected_spots()) # low-res (5 A) frame_numbers = numpy.array(map(lambda x: int(x[2]) + 1, spots)) data_range = map(int, xdsinp_dict["DATA_RANGE"].split()) # XXX this assumes SPOT_RANGE equals to DATA_RANGE. Is this guaranteed? h = numpy.histogram(frame_numbers, bins=numpy.arange(data_range[0], data_range[1] + 2, step=1)) q14 = numpy.percentile(h[0], [25, 75]) iqr = q14[1] - q14[0] cutoff = max(h[0][h[0] <= iqr * 1.5 + q14[1]]) / 5 # magic number print "DEBUG:: IQR= %.2f, Q1/4= %s, cutoff= %.2f" % (iqr, q14, cutoff) cut_frames = h[1][h[0] < cutoff] keep_frames = h[1][h[0] >= cutoff] print "DEBUG:: keep_frames=", keep_frames print "DEBUG:: cut_frames=", cut_frames if len(cut_frames) > 0: cut_ranges = [ [cut_frames[0], cut_frames[0]], ] for fn in cut_frames: if fn - cut_ranges[-1][1] <= 1: cut_ranges[-1][1] = fn else: cut_ranges.append([fn, fn]) # Edit XDS.INP cut_inp_str = "".join( map(lambda x: "EXCLUDE_DATA_RANGE= %6d %6d\n" % tuple(x), cut_ranges)) open(xdsinp, "a").write("\n" + cut_inp_str) # Edit SPOT.XDS shutil.copyfile(spot_xds, spot_xds + ".org") sx.write(open(spot_xds, "w"), frame_selection=set(keep_frames)) # Indexing modify_xdsinp(xdsinp, inp_params=[("JOB", "IDXREF")]) run_xds(wdir=root, show_progress=params.show_progress) print # indexing stats like indexed percentage here. if params.tryhard: try_indexing_hard(root, params.show_progress, decilog, known_sgnum=params.cell_prior.sgnum, known_cell=params.cell_prior.cell, tol_length=params.cell_prior.tol_length, tol_angle=params.cell_prior.tol_angle) if not os.path.isfile(xparm): print >> decilog, " Indexing failed." return if params.cell_prior.check and params.cell_prior.sgnum > 0: xsxds = XPARM(xparm).crystal_symmetry() xsref = crystal.symmetry(params.cell_prior.cell, params.cell_prior.sgnum) cosets = reindex.reindexing_operators(xsref, xsxds, params.cell_prior.tol_length, params.cell_prior.tol_angle) if cosets.double_cosets is None: print >> decilog, " Incompatible cell. Indexing failed." return elif params.mode == "recycle": print " Start recycle. original ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) for f in xds_files.generated_after_DEFPIX + ("XPARM.XDS", "plot_integrate.log"): util.rotate_file(os.path.join(root, f), copy=True) shutil.copyfile(gxparm + ".1", xparm) else: raise "Unknown mode (%s)" % params.mode # To Integration modify_xdsinp(xdsinp, inp_params=[("JOB", "DEFPIX INTEGRATE"), ("INCLUDE_RESOLUTION_RANGE", "50 0")]) run_xds(wdir=root, show_progress=params.show_progress) if os.path.isfile(integrate_lp): xds_plot_integrate.run(integrate_lp, os.path.join(root, "plot_integrate.log")) if not os.path.isfile(integrate_hkl): print >> decilog, " Integration failed." return # Make _noscale.HKL if needed if params.no_scaling: bk_prefix = make_backup(("XDS.INP", ), wdir=root, quiet=True) xparm_obj = XPARM(xparm) modify_xdsinp(xdsinp, inp_params=[ ("JOB", "CORRECT"), ("CORRECTIONS", ""), ("NBATCH", "1"), ("MINIMUM_I/SIGMA", "50"), ("REFINE(CORRECT)", ""), ("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, xparm_obj.unit_cell))), ("SPACE_GROUP_NUMBER", "%d" % xparm_obj.spacegroup), ]) print >> decilog, " running CORRECT without empirical scaling" run_xds(wdir=root, show_progress=params.show_progress) for f in xds_files.generated_by_CORRECT + ("XDS.INP", ): ff = os.path.join(root, f) if not os.path.isfile(ff): continue if ff.endswith(".cbf"): os.remove(ff) else: os.rename(ff, ff + "_noscale") revert_files(("XDS.INP", ), bk_prefix, wdir=root, quiet=True) # Run pointless symm_by_integrate = None if params.use_pointless: worker = Pointless() result = worker.run_for_symm(xdsin=integrate_hkl, logout=os.path.join( root, "pointless_integrate.log")) if "symm" in result: symm = result["symm"] print >> decilog, " pointless using INTEGRATE.HKL suggested", symm.space_group_info( ) sgnum = symm.space_group_info().type().number() cell = " ".join( map(lambda x: "%.2f" % x, symm.unit_cell().parameters())) modify_xdsinp(xdsinp, inp_params=[("SPACE_GROUP_NUMBER", "%d" % sgnum), ("UNIT_CELL_CONSTANTS", cell)]) symm_by_integrate = symm else: print >> decilog, " pointless failed." # Do Scaling modify_xdsinp(xdsinp, inp_params=[ ("JOB", "CORRECT"), ]) run_xds(wdir=root, show_progress=params.show_progress) if not os.path.isfile(gxparm): print >> decilog, " Scaling failed." return print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa(correct_lp, check_valid=True) ret = calc_merging_stats(os.path.join(root, "XDS_ASCII.HKL")) if params.cut_resolution: if ret is not None and ret[0] is not None: d_min = ret[0] modify_xdsinp(xdsinp, inp_params=[("JOB", "CORRECT"), ("INCLUDE_RESOLUTION_RANGE", "50 %.2f" % d_min)]) print >> decilog, " Re-scale at %.2f A" % d_min os.rename(os.path.join(root, "CORRECT.LP"), os.path.join(root, "CORRECT_fullres.LP")) os.rename(os.path.join(root, "XDS_ASCII.HKL"), os.path.join(root, "XDS_ASCII_fullres.HKL")) run_xds(wdir=root, show_progress=params.show_progress) print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) print >> decilog, " (Original files are saved as *_fullres.*)" else: print >> decilog, "error: Can't decide resolution." last_ISa = correctlp.get_ISa(correct_lp, check_valid=True) # Run pointless and (if result is different from INTEGRATE) re-scale. if params.use_pointless: worker = Pointless() result = worker.run_for_symm(xdsin=xac_hkl, logout=os.path.join( root, "pointless_correct.log")) if "symm" in result: symm = result["symm"] need_rescale = False if symm_by_integrate is not None: if not xtal.is_same_laue_symmetry( symm_by_integrate.space_group(), symm.space_group()): print >> decilog, "pointless suggested %s, which is different Laue symmetry from INTEGRATE.HKL (%s)" % ( symm.space_group_info(), symm_by_integrate.space_group_info()) need_rescale = True else: print >> decilog, "pointless using XDS_ASCII.HKL suggested %s" % symm.space_group_info( ) need_rescale = True if need_rescale: # make backup, and do correct and compare ISa # if ISa got worse, revert the result. backup_needed = ("XDS.INP", "XDS_ASCII_fullres.HKL", "CORRECT_fullres.LP", "merging_stats.pkl", "merging_stats.log") backup_needed += xds_files.generated_by_CORRECT bk_prefix = make_backup(backup_needed, wdir=root, quiet=True) sgnum = symm.space_group_info().type().number() cell = " ".join( map(lambda x: "%.2f" % x, symm.unit_cell().parameters())) modify_xdsinp(xdsinp, inp_params=[("JOB", "CORRECT"), ("SPACE_GROUP_NUMBER", "%d" % sgnum), ("UNIT_CELL_CONSTANTS", cell), ("INCLUDE_RESOLUTION_RANGE", "50 0") ]) run_xds(wdir=root, show_progress=params.show_progress) ret = calc_merging_stats(os.path.join(root, "XDS_ASCII.HKL")) if params.cut_resolution: if ret is not None and ret[0] is not None: d_min = ret[0] modify_xdsinp(xdsinp, inp_params=[("JOB", "CORRECT"), ("INCLUDE_RESOLUTION_RANGE", "50 %.2f" % d_min)]) print >> decilog, " Re-scale at %.2f A" % d_min os.rename(os.path.join(root, "CORRECT.LP"), os.path.join(root, "CORRECT_fullres.LP")) os.rename(os.path.join(root, "XDS_ASCII.HKL"), os.path.join(root, "XDS_ASCII_fullres.HKL")) run_xds(wdir=root, show_progress=params.show_progress) print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) print >> decilog, " (Original files are saved as *_fullres.*)" else: print >> decilog, "error: Can't decide resolution." for f in ("CORRECT_fullres.LP", "XDS_ASCII_fullres.HKL"): if os.path.isfile(os.path.join(root, f)): print >> decilog, "removing", f os.remove(os.path.join(root, f)) ISa = correctlp.get_ISa(correct_lp, check_valid=True) if ISa >= last_ISa or last_ISa != last_ISa: # if improved or last_ISa is nan print >> decilog, "ISa improved= %.2f" % ISa remove_backups(backup_needed, bk_prefix, wdir=root) else: print >> decilog, "ISa got worse= %.2f" % ISa for f in backup_needed: if os.path.isfile(os.path.join(root, f)): os.remove(os.path.join(root, f)) revert_files(backup_needed, bk_prefix, wdir=root, quiet=True) run_xdsstat(wdir=root) print if params.make_report: html_report.make_individual_report(root, root) print >> decilog, "xds_sequence finished at %s\n" % time.strftime( "%Y-%m-%d %H:%M:%S") decilog.close()
def run_cycle(self, xds_ascii_files, reference_idx=None): if len(xds_ascii_files) == 0: print >>self.out, "Error: no files given." return xscale_inp = os.path.join(self.workdir, "XSCALE.INP") xscale_lp = os.path.join(self.workdir, "XSCALE.LP") # Get averaged cell for scaling sg, cell, lcv, alcv = self.average_cells(xds_ascii_files) self.cell_info_at_cycles[self.get_last_cycle_number()] = (cell, lcv, alcv) # Choose directory containing XDS_ASCII.HKL and set space group (but how??) inp_out = open(xscale_inp, "w") inp_out.write("MAXIMUM_NUMBER_OF_PROCESSORS= %d\n" % self.nproc) inp_out.write("SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell)) inp_out.write(self.xscale_inp_head) for i, xds_ascii in enumerate(xds_ascii_files): f = self.altfile.get(xds_ascii, xds_ascii) tmp = min(os.path.relpath(f, self.workdir), f, key=lambda x:len(x)) refstr = "*" if i==reference_idx else " " inp_out.write(" INPUT_FILE=%s%s\n" % (refstr,tmp)) if len(self.xscale_params.corrections) != 3: inp_out.write(" CORRECTIONS= %s\n" % " ".join(self.xscale_params.corrections)) if self.xscale_params.frames_per_batch is not None: frame_range = XDS_ASCII(f, read_data=False).get_frame_range() nframes = frame_range[1] - frame_range[0] nbatch = int(numpy.ceil(nframes / self.xscale_params.frames_per_batch)) print >>self.out, "frame range of %s is %d,%d setting NBATCH= %d" % (f, frame_range[0], frame_range[1], nbatch) inp_out.write(" NBATCH= %d\n" % nbatch) inp_out.close() print >>self.out, "DEBUG:: running xscale with %3d files.." % len(xds_ascii_files) try: xscale.run_xscale(xscale_inp, cbf_to_dat=True, use_tmpdir_if_available=self.xscale_params.use_tmpdir_if_available) except: print >>self.out, traceback.format_exc() xscale_log = open(xscale_lp).read() if "!!! ERROR !!! INSUFFICIENT NUMBER OF COMMON STRONG REFLECTIONS." in xscale_log: print >>self.out, "DEBUG:: Need to choose files." # From XDS ver. March 1, 2015, it kindly informs which dataset has no common reflections. # ..but does not print the table. Sometimes only one dataset is left. Should we make table by ourselves? # Older versions just print correlation table and stop. if "CORRELATIONS BETWEEN INPUT DATA SETS AFTER CORRECTIONS" in xscale_log: G = xscalelp.construct_data_graph(xscale_lp, min_common_refs=10) #nx.write_dot(G, os.path.join(self.workdir, "common_set_graph.dot")) cliques = [c for c in nx.find_cliques(G)] cliques.sort(key=lambda x:len(x)) if self._counter == 1: max_clique = cliques[-1] else: idx_prevfile = 1 if self.reference_file else 0 max_clique = filter(lambda x: idx_prevfile in x, cliques)[-1] # xscale.hkl must be included! if self.reference_file: max_clique = [0,] + filter(lambda x: x!=0, max_clique) for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) try_later = map(lambda i: xds_ascii_files[i], filter(lambda x: x not in max_clique, G.nodes())) print >>self.out, "DEBUG:: %d files can be merged. %d files will be merged later." % (len(max_clique), len(try_later)) print >>self.out, "DEBUG:: %d files are of no use." % (len(xds_ascii_files)-len(G.nodes())) for i in filter(lambda j: j not in G.nodes(), xrange(len(xds_ascii_files))): self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "no_common_refls" self.run_cycle(map(lambda i: xds_ascii_files[i], max_clique)) assert len(try_later) <= 0 # Never be the case with newer xscale!! (if the case, check_remove_list() should be modified to skip_num+=1 if len(try_later) > 0: print >>self.out, "Trying to merge %d remaining files.." % len(try_later) next_files = [os.path.join(self.workdir, "xscale.hkl")] + try_later if self.reference_file: next_files = [self.reference_file,] + next_files self.workdir = self.request_next_workdir() self.run_cycle(next_files) return else: bad_idxes = xscalelp.read_no_common_ref_datasets(xscale_lp) print >>self.out, "DEBUG:: %d files are of no use." % (len(bad_idxes)) for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) # XXX Actually, not all datasets need to be thrown.. some of them are useful.. for i in bad_idxes: self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "no_common_refls" self.run_cycle(map(lambda i: xds_ascii_files[i], filter(lambda j: j not in bad_idxes, xrange(len(xds_ascii_files))))) return elif "!!! ERROR !!! USELESS DATA ON INPUT REFLECTION FILE" in xscale_log: print >>self.out, "DEBUG:: Need to discard useless data." unuseful_data = [xscalelp.get_read_data(xscale_lp)[-1]] #filter(lambda x: x[2]==0, xscalelp.get_read_data(xscale_lp)) if len(unuseful_data) == 0: print >>self.out, "I don't know how to fix it.." return remove_idxes = map(lambda x: x[0]-1, unuseful_data) remove_idxes = self.check_remove_list(remove_idxes) keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files))) for i in remove_idxes: self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "useless" for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes)) return elif "INACCURATE SCALING FACTORS." in xscale_log: # Actually I don't know how to fix this.. (bug?) but worth proceeding (discarding bad data may solve problem). print >>self.out, "'INACCURATE SCALING FACTORS' happened.. but ignored." elif "!!! ERROR !!!" in xscale_log: print >>self.out, "Unknown error! please check the XSCALE.LP and fix the program." return # Re-scale by changing reference rescale_for = None if len(self.reject_method) == 0: rescale_for = self.reference_choice # may be None elif reference_idx is None: rescale_for = "bmed" if rescale_for is not None and len(xds_ascii_files) > 1: ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, rescale_for, return_as="index") if reference_idx != ref_num: print >>self.out, "Rescaling with %s" % rescale_for for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(xds_ascii_files, reference_idx=ref_num) if len(self.reject_method) == 0: return # Remove bad data remove_idxes = [] remove_reasons = {} if self.reject_method[0] == "framecc": print >>self.out, "Rejections based on frame CC" from yamtbx.dataproc.xds.command_line import xscale_cc_against_merged # list of [frame, n_all, n_common, cc] in the same order framecc = xscale_cc_against_merged.run(hklin=os.path.join(self.workdir, "xscale.hkl"), output_dir=self.workdir, nproc=self.nproc).values() if self.reject_params.framecc.method == "tukey": ccs = numpy.array(map(lambda x: x[3], reduce(lambda x,y:x+y,framecc))) ccs = ccs[ccs==ccs] # Remove nan q25, q75 = numpy.percentile(ccs, [25, 75]) cc_cutoff = q25 - self.reject_params.framecc.iqr_coeff * (q75 - q25) print >>self.out, " frameCC cutoff = %.4f (%.2f*IQR)" % (cc_cutoff, self.reject_params.framecc.iqr_coeff) else: cc_cutoff = self.reject_params.framecc.abs_cutoff print >>self.out, " frameCC cutoff = %.4f (value specified)" % cc_cutoff for i, cclist in enumerate(framecc): useframes = map(lambda x: x[0], filter(lambda x: x[3] > cc_cutoff, cclist)) if len(useframes) == 0: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("allbadframe") continue f = xds_ascii_files[i] xac = XDS_ASCII(f) if set(useframes).issuperset(set(range(min(xac.iframe), max(xac.iframe)))): continue # All useful frames. sel = xac.iframe == useframes[0] for x in useframes[1:]: sel |= xac.iframe == x if sum(sel) < 10: # XXX care I/sigma remove_idxes.append(i) remove_reasons.setdefault(i, []).append("allbadframe") continue print >>self.out, "Extracting frames %s out of %d-%d in %s" % (",".join(map(str,useframes)), min(xac.iframe), max(xac.iframe), f) newf = self.request_file_modify(f) xac.write_selected(sel, newf) self.reject_method.pop(0) # Perform only once elif self.reject_method[0] == "lpstats": if "bfactor" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on B-factor outliers (%.2f*IQR)" % iqrc Bs = numpy.array(map(lambda x:x[1], xscalelp.get_k_b(xscale_lp))) q25, q75 = numpy.percentile(Bs, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, b in enumerate(Bs): if b < lowlim or b > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_B") count += 1 print >>self.out, " %4d B-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "em.b" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on error model b outliers (%.2f*IQR)" % iqrc bs = numpy.array(map(lambda x:x[1], xscalelp.get_ISa(xscale_lp))) q25, q75 = numpy.percentile(bs, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, b in enumerate(bs): if b < lowlim or b > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_em.b") count += 1 print >>self.out, " %4d error model b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "em.ab" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on error model a*b outliers (%.2f*IQR)" % iqrc vals = numpy.array(map(lambda x:x[0]*x[1], xscalelp.get_ISa(xscale_lp))) q25, q75 = numpy.percentile(vals, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, ab in enumerate(vals): if ab < lowlim or ab > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_em.ab") count += 1 print >>self.out, " %4d error model a*b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "rfactor" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on R-factor outliers (%.2f*IQR)" % iqrc rstats = xscalelp.get_rfactors_for_each(xscale_lp) vals = numpy.array(map(lambda x:rstats[x][-1][1], rstats)) # Read total R-factor q25, q75 = numpy.percentile(vals, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, v in enumerate(vals): if v < lowlim or v > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_R") count += 1 print >>self.out, " %4d R-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "pairwise_cc" in self.reject_params.lpstats.stats: corrs = xscalelp.get_pairwise_correlations(xscale_lp) if self.reject_params.lpstats.pwcc.method == "tukey": q25, q75 = numpy.percentile(map(lambda x: x[3], corrs), [25, 75]) iqr = q75 - q25 lowlim = q25 - self.reject_params.lpstats.pwcc.iqr_coeff * iqr print >>self.out, "Rejections based on pairwise_cc < %.4f (IQR=%.2f)" % (lowlim, iqr) else: lowlim = self.reject_params.lpstats.pwcc.abs_cutoff print >>self.out, "Rejections based on pairwise_cc < %.4f" % lowlim bad_corrs = filter(lambda x: x[3] < lowlim, corrs) idx_bad = {} for i, j, common_refs, corr, ratio, bfac in bad_corrs: idx_bad[i] = idx_bad.get(i, 0) + 1 idx_bad[j] = idx_bad.get(j, 0) + 1 idx_bad = idx_bad.items() idx_bad.sort(key=lambda x:x[1]) count = 0 for idx, badcount in reversed(idx_bad): remove_idxes.append(idx-1) remove_reasons.setdefault(idx-1, []).append("bad_pwcc") bad_corrs = filter(lambda x: idx not in x[:2], bad_corrs) if len(bad_corrs) == 0: break fun_key = lambda x: x[3] print >>self.out, " Removing idx=%d (CC %.3f..%.3f) remaining %d bad pairs" % (idx, min(bad_corrs,key=fun_key)[3], max(bad_corrs,key=fun_key)[3], len(bad_corrs)) count += 1 print >>self.out, " %4d pairwise CC outliers removed" % count self.reject_method.pop(0) # Perform only once elif self.reject_method[0] == "delta_cc1/2": print >>self.out, "Rejection based on delta_CC1/2 in %s shell" % self.delta_cchalf_bin table = xscalelp.read_stats_table(xscale_lp) i_stat = -1 if self.delta_cchalf_bin == "total" else -2 prev_cchalf = table["cc_half"][i_stat] prev_nuniq = table["nuniq"][i_stat] # file_name->idx table remaining_files = collections.OrderedDict(map(lambda x: x[::-1], enumerate(xds_ascii_files))) # For consistent resolution limit inp_head = self.xscale_inp_head + "SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell) count = 0 for i in xrange(len(xds_ascii_files)-1): # if only one file, cannot proceed. tmpdir = os.path.join(self.workdir, "reject_test_%.3d" % i) cchalf_list = xscale.calc_cchalf_by_removing(wdir=tmpdir, inp_head=inp_head, inpfiles=remaining_files.keys(), stat_bin=self.delta_cchalf_bin, nproc=self.nproc, nproc_each=self.nproc_each, batchjobs=self.batchjobs) rem_idx, cc_i, nuniq_i = cchalf_list[0] # First (largest) is worst one to remove. rem_idx_in_org = remaining_files[remaining_files.keys()[rem_idx]] # Decision making by CC1/2 print >>self.out, "DEBUG:: cycle %.3d remove %3d if %.2f*%d > %.2f*%d" % (i, rem_idx_in_org, cc_i, nuniq_i, prev_cchalf, prev_nuniq) if cc_i*nuniq_i <= prev_cchalf*prev_nuniq: break print >>self.out, "Removing idx= %3d gained CC1/2 by %.2f" % (rem_idx_in_org, cc_i-prev_cchalf) prev_cchalf, prev_nuniq = cc_i, nuniq_i remove_idxes.append(rem_idx_in_org) remove_reasons.setdefault(rem_idx_in_org, []).append("bad_cchalf") del remaining_files[remaining_files.keys()[rem_idx]] # remove file from table count += 1 print >>self.out, " %4d removed by DeltaCC1/2 method" % count if self.next_delta_cchalf_bin != []: self.delta_cchalf_bin = self.next_delta_cchalf_bin.pop(0) else: self.reject_method.pop(0) else: print >>self.out, "ERROR:: Unsupported reject_method (%s)" % reject_method # Remove duplicates remove_idxes = list(set(remove_idxes)) remove_idxes = self.check_remove_list(remove_idxes) if len(remove_idxes) > 0: print >>self.out, "DEBUG:: Need to remove %d files" % len(remove_idxes) for i in sorted(remove_idxes): print >>self.out, " %.3d %s" % (i, xds_ascii_files[i]) self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = ",".join(remove_reasons[i]) # Next run keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files))) if len(self.reject_method) > 0 or len(remove_idxes) > 0: self.workdir = self.request_next_workdir() self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes)) elif self.reference_choice is not None and len(keep_idxes) > 1: # Just re-scale with B reference ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, self.reference_choice, return_as="index") if reference_idx != ref_num: print >>self.out, "Rescaling2 with %s" % self.reference_choice for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes), reference_idx=ref_num)
def run(params, xac_files): if len(xac_files) == 0: print "No XDS_ASCII.HKL files provided." return # Parse dmin_dict = {} if params.dmin_lst: for l in open(params.dmin_lst): sp = l.split() if len(sp) != 2: continue f, dmin = sp dmin_dict[f] = dmin xscale_inp_head = "!MINIMUM_I/SIGMA= 3\n\n" if params.wfac1 is not None: xscale_inp_head += "WFAC1= %.3f\n" % params.wfac1 if params.nproc: xscale_inp_head += "MAXIMUM_NUMBER_OF_PROCESSORS= %d\n" % params.nproc infos = {} d_max, d_min = 0, 100 cells = [] for xds_ascii in xac_files: info = get_xac_info(xds_ascii, get_nframes=params.frames_per_batch is not None) if xds_ascii in dmin_dict: dmax,dmin = info["resol_range"].split() info["resol_range_user"] = "******" % (dmax, dmin_dict[xds_ascii]) infos[xds_ascii] = info resrng = map(float, info["resol_range"].split()) d_max = max(d_max, resrng[0]) d_min = min(d_min, resrng[1]) cells.append(map(float, info["cell"].split())) if params.d_min is not None: d_min = max(params.d_min, d_min) if params.cell == "average": cell_sum = reduce(lambda x,y: map(lambda a: a[0]+a[1], zip(x,y)), cells) cell_mean = map(lambda x: x/float(len(cells)), cell_sum) if params.sgnum is not None: sgnum = str(params.sgnum) else: sgnum = infos[xac_files[0]]["spgr_num"] xscale_inp_head += " SPACE_GROUP_NUMBER= %s\n" % sgnum xscale_inp_head += " UNIT_CELL_CONSTANTS= %s\n" % " ".join(map(lambda x: "%.3f"%x, cell_mean)) xscale_inp_head += make_shells(d_max, d_min, params.nbins) + "\n" xscale_inp_head += " OUTPUT_FILE= %s\n" % params.output xscale_inp_head += " FRIEDEL'S_LAW= %s\n\n" % ("FALSE" if params.anomalous else "TRUE") prep_xscale_inp(params.workdir, xscale_inp_head, xac_files, infos, params.frames_per_batch, params.corrections) xscale.run_xscale(os.path.join(params.workdir, "XSCALE.INP"), cbf_to_dat=params.cbf_to_dat, use_tmpdir_if_available=params.use_tmpdir_if_available) if params.reference: print "Choosing reference data (reference=%s)" % params.reference ref_idx = xscale.decide_scaling_reference_based_on_bfactor(os.path.join(params.workdir, "XSCALE.LP"), params.reference, return_as="index") if ref_idx != 0: for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(params.workdir, f)) prep_xscale_inp(params.workdir, xscale_inp_head, xac_files, infos, params.frames_per_batch, params.corrections, ref_idx=ref_idx) xscale.run_xscale(os.path.join(params.workdir, "XSCALE.INP"), cbf_to_dat=params.cbf_to_dat, use_tmpdir_if_available=params.use_tmpdir_if_available)
def xds_sequence(root, params): print print os.path.relpath(root, params.topdir) init_lp = os.path.join(root, "INIT.LP") xparm = os.path.join(root, "XPARM.XDS") gxparm = os.path.join(root, "GXPARM.XDS") defpix_lp = os.path.join(root, "DEFPIX.LP") correct_lp = os.path.join(root, "CORRECT.LP") integrate_hkl = os.path.join(root, "INTEGRATE.HKL") xac_hkl = os.path.join(root, "XDS_ASCII.HKL") integrate_lp = os.path.join(root, "INTEGRATE.LP") spot_xds = os.path.join(root, "SPOT.XDS") xdsinp = os.path.join(root, "XDS.INP") assert os.path.isfile(xdsinp) if params.cell_prior.force: assert params.cell_prior.check xdsinp_dict = dict(get_xdsinp_keyword(xdsinp)) if params.cell_prior.sgnum > 0: xs_prior = crystal.symmetry(params.cell_prior.cell, params.cell_prior.sgnum) else: xs_prior = None decilog = multi_out() decilog.register("log", open(os.path.join(root, "decision.log"), "a"), atexit_send_to=None) try: print >> decilog, "xds_sequence started at %s in %s\n" % ( time.strftime("%Y-%m-%d %H:%M:%S"), root) if not kamo_test_installation.tst_xds(): print >> decilog, "XDS is not installed or expired!!" return if params.show_progress: decilog.register("stdout", sys.stdout) if params.mode == "initial" and params.resume and os.path.isfile( correct_lp): print >> decilog, " Already processed." return if params.mode == "recycle" and not os.path.isfile(gxparm): print >> decilog, "GXPARM.XDS not found. Cannot do recycle." return if params.fast_delphi and (params.nproc is None or params.nproc > 1): delphi = optimal_delphi_by_nproc(xdsinp=xdsinp, nproc=params.nproc) print >> decilog, " Setting delphi to ", delphi modify_xdsinp(xdsinp, inp_params=[ ("DELPHI", str(delphi)), ]) if params.nproc is not None and params.nproc > 1: modify_xdsinp(xdsinp, inp_params=[ ("MAXIMUM_NUMBER_OF_PROCESSORS", str(params.nproc)), ]) if params.mode == "initial": modify_xdsinp(xdsinp, inp_params=[("JOB", "XYCORR INIT")]) run_xds(wdir=root, show_progress=params.show_progress) initlp = InitLp(init_lp) first_bad = initlp.check_bad_first_frames() if first_bad: print >> decilog, " first frames look bad (too weak) exposure:", first_bad new_data_range = map( int, dict(get_xdsinp_keyword(xdsinp))["DATA_RANGE"].split()) new_data_range[0] = first_bad[-1] + 1 print >> decilog, " changing DATA_RANGE= to", new_data_range modify_xdsinp(xdsinp, inp_params=[("JOB", "INIT"), ("DATA_RANGE", "%d %d" % tuple(new_data_range))]) for f in xds_files.generated_by_INIT: util.rotate_file(os.path.join(root, f), copy=False) run_xds(wdir=root, show_progress=params.show_progress) # Peak search modify_xdsinp(xdsinp, inp_params=[("JOB", "COLSPOT")]) run_xds(wdir=root, show_progress=params.show_progress) if params.auto_frame_exclude_spot_based: sx = idxreflp.SpotXds(spot_xds) sx.set_xdsinp(xdsinp) spots = filter(lambda x: 5 < x[-1] < 30, sx.collected_spots()) # low-res (5 A) frame_numbers = numpy.array(map(lambda x: int(x[2]) + 1, spots)) data_range = map( int, dict(get_xdsinp_keyword(xdsinp))["DATA_RANGE"].split()) # XXX this assumes SPOT_RANGE equals to DATA_RANGE. Is this guaranteed? h = numpy.histogram(frame_numbers, bins=numpy.arange(data_range[0], data_range[1] + 2, step=1)) q14 = numpy.percentile(h[0], [25, 75]) iqr = q14[1] - q14[0] cutoff = max( h[0][h[0] <= iqr * 1.5 + q14[1]]) / 5 # magic number print >> decilog, "DEBUG:: IQR= %.2f, Q1/4= %s, cutoff= %.2f" % ( iqr, q14, cutoff) cut_frames = h[1][h[0] < cutoff] keep_frames = h[1][h[0] >= cutoff] print >> decilog, "DEBUG:: keep_frames=", keep_frames print >> decilog, "DEBUG:: cut_frames=", cut_frames if len(cut_frames) > 0: cut_ranges = [ [cut_frames[0], cut_frames[0]], ] for fn in cut_frames: if fn - cut_ranges[-1][1] <= 1: cut_ranges[-1][1] = fn else: cut_ranges.append([fn, fn]) # Edit XDS.INP cut_inp_str = "".join( map( lambda x: "EXCLUDE_DATA_RANGE= %6d %6d\n" % tuple( x), cut_ranges)) open(xdsinp, "a").write("\n" + cut_inp_str) # Edit SPOT.XDS shutil.copyfile(spot_xds, spot_xds + ".org") sx.write(open(spot_xds, "w"), frame_selection=set(keep_frames)) # Indexing if params.cell_prior.method == "use_first": modify_xdsinp(xdsinp, inp_params=[ ("JOB", "IDXREF"), ("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, params.cell_prior.cell))), ("SPACE_GROUP_NUMBER", "%d" % params.cell_prior.sgnum), ]) else: modify_xdsinp(xdsinp, inp_params=[("JOB", "IDXREF")]) run_xds(wdir=root, show_progress=params.show_progress) print >> decilog, "" # TODO indexing stats like indexed percentage here. if params.tryhard: try_indexing_hard(root, params.show_progress, decilog, known_sgnum=params.cell_prior.sgnum, known_cell=params.cell_prior.cell, tol_length=params.cell_prior.tol_length, tol_angle=params.cell_prior.tol_angle) if not os.path.isfile(xparm): print >> decilog, " Indexing failed." return if params.cell_prior.sgnum > 0: # Check anyway xsxds = XPARM(xparm).crystal_symmetry() cosets = reindex.reindexing_operators( xs_prior, xsxds, params.cell_prior.tol_length, params.cell_prior.tol_angle) if cosets.double_cosets is None: if params.cell_prior.check: print >> decilog, " Incompatible cell. Indexing failed." return else: print >> decilog, " Warning: Incompatible cell." elif params.cell_prior.method == "symm_constraint_only": cell = xsxds.unit_cell().change_basis( cosets.combined_cb_ops()[0]) print >> decilog, " Trying symmetry-constrained cell parameter:", cell modify_xdsinp(xdsinp, inp_params=[ ("JOB", "IDXREF"), ("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, cell.parameters()))), ("SPACE_GROUP_NUMBER", "%d" % params.cell_prior.sgnum), ]) for f in xds_files.generated_by_IDXREF: util.rotate_file(os.path.join(root, f), copy=(f == "SPOT.XDS")) run_xds(wdir=root, show_progress=params.show_progress) if not os.path.isfile(xparm): print >> decilog, " Indexing failed." return # Check again xsxds = XPARM(xparm).crystal_symmetry() if not xsxds.unit_cell().is_similar_to( xs_prior.unit_cell(), params.cell_prior.tol_length, params.cell_prior.tol_angle): print >> decilog, " Resulted in different cell. Indexing failed." return elif params.mode == "recycle": print >> decilog, " Start recycle. original ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) for f in xds_files.generated_after_DEFPIX + ("XPARM.XDS", "plot_integrate.log"): util.rotate_file(os.path.join(root, f), copy=True) shutil.copyfile(gxparm + ".1", xparm) else: raise "Unknown mode (%s)" % params.mode # To Integration modify_xdsinp(xdsinp, inp_params=[("JOB", "DEFPIX INTEGRATE"), ("INCLUDE_RESOLUTION_RANGE", "50 0")]) run_xds(wdir=root, show_progress=params.show_progress) if os.path.isfile(integrate_lp): xds_plot_integrate.run(integrate_lp, os.path.join(root, "plot_integrate.log")) if not os.path.isfile(integrate_hkl): print >> decilog, " Integration failed." return # Make _noscale.HKL if needed if params.no_scaling: bk_prefix = make_backup(("XDS.INP", ), wdir=root, quiet=True) xparm_obj = XPARM(xparm) modify_xdsinp(xdsinp, inp_params=[ ("JOB", "CORRECT"), ("CORRECTIONS", ""), ("NBATCH", "1"), ("MINIMUM_I/SIGMA", "50"), ("REFINE(CORRECT)", ""), ("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, xparm_obj.unit_cell))), ("SPACE_GROUP_NUMBER", "%d" % xparm_obj.spacegroup), ]) print >> decilog, " running CORRECT without empirical scaling" run_xds(wdir=root, show_progress=params.show_progress) for f in xds_files.generated_by_CORRECT + ("XDS.INP", ): ff = os.path.join(root, f) if not os.path.isfile(ff): continue if ff.endswith(".cbf"): os.remove(ff) else: os.rename(ff, ff + "_noscale") revert_files(("XDS.INP", ), bk_prefix, wdir=root, quiet=True) # Run pointless pointless_integrate = {} if params.use_pointless: worker = Pointless() pointless_integrate = worker.run_for_symm( xdsin=integrate_hkl, logout=os.path.join(root, "pointless_integrate.log")) if "symm" in pointless_integrate: symm = pointless_integrate["symm"] print >> decilog, " pointless using INTEGRATE.HKL suggested", symm.space_group_info( ) if xs_prior: if xtal.is_same_space_group_ignoring_enantiomorph( symm.space_group(), xs_prior.space_group()): print >> decilog, " which is consistent with given symmetry." elif xtal.is_same_laue_symmetry(symm.space_group(), xs_prior.space_group()): print >> decilog, " which has consistent Laue symmetry with given symmetry." else: print >> decilog, " which is inconsistent with given symmetry." sgnum = symm.space_group_info().type().number() cell = " ".join( map(lambda x: "%.2f" % x, symm.unit_cell().parameters())) modify_xdsinp(xdsinp, inp_params=[("SPACE_GROUP_NUMBER", "%d" % sgnum), ("UNIT_CELL_CONSTANTS", cell)]) else: print >> decilog, " pointless failed." flag_do_not_change_symm = False if xs_prior and params.cell_prior.force: modify_xdsinp(xdsinp, inp_params=[("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, params.cell_prior.cell))), ("SPACE_GROUP_NUMBER", "%d" % params.cell_prior.sgnum)]) flag_do_not_change_symm = True elif params.cell_prior.method == "correct_only": xsxds = XPARM(xparm).crystal_symmetry() cosets = reindex.reindexing_operators(xs_prior, xsxds, params.cell_prior.tol_length, params.cell_prior.tol_angle) if cosets.double_cosets is not None: cell = xsxds.unit_cell().change_basis( cosets.combined_cb_ops()[0]) print >> decilog, " Using given symmetry in CORRECT with symmetry constraints:", cell modify_xdsinp(xdsinp, inp_params=[ ("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, cell.parameters()))), ("SPACE_GROUP_NUMBER", "%d" % params.cell_prior.sgnum), ]) flag_do_not_change_symm = True else: print >> decilog, " Tried to use given symmetry in CORRECT, but cell in integration is incompatible." # Do Scaling modify_xdsinp(xdsinp, inp_params=[ ("JOB", "CORRECT"), ]) run_xds(wdir=root, show_progress=params.show_progress) if not os.path.isfile(xac_hkl): print >> decilog, " CORRECT failed." return if not os.path.isfile(gxparm): print >> decilog, " Refinement in CORRECT failed." print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) ret = calc_merging_stats(xac_hkl) if params.cut_resolution: if ret is not None and ret[0] is not None: d_min = ret[0] modify_xdsinp(xdsinp, inp_params=[("JOB", "CORRECT"), ("INCLUDE_RESOLUTION_RANGE", "50 %.2f" % d_min)]) print >> decilog, " Re-scale at %.2f A" % d_min os.rename(os.path.join(root, "CORRECT.LP"), os.path.join(root, "CORRECT_fullres.LP")) os.rename(xac_hkl, os.path.join(root, "XDS_ASCII_fullres.HKL")) run_xds(wdir=root, show_progress=params.show_progress) print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) print >> decilog, " (Original files are saved as *_fullres.*)" else: print >> decilog, "error: Can't decide resolution." last_ISa = correctlp.get_ISa(correct_lp, check_valid=True) # Run pointless and (if result is different from INTEGRATE) re-scale. if params.use_pointless: worker = Pointless() pointless_correct = worker.run_for_symm( xdsin=xac_hkl, logout=os.path.join(root, "pointless_correct.log")) pointless_best_symm = None if "symm" in pointless_correct: symm = pointless_correct["symm"] need_rescale = False if pointless_integrate.get("symm"): symm_by_integrate = pointless_integrate["symm"] if not xtal.is_same_laue_symmetry( symm_by_integrate.space_group(), symm.space_group()): print >> decilog, "pointless suggested %s, which is different Laue symmetry from INTEGRATE.HKL (%s)" % ( symm.space_group_info(), symm_by_integrate.space_group_info()) prob_integrate = pointless_integrate.get( "laue_prob", float("nan")) prob_correct = pointless_correct.get( "laue_prob", float("nan")) print >> decilog, " Prob(%s |INTEGRATE), Prob(%s |CORRECT) = %.4f, %.4f." % ( symm_by_integrate.space_group_info(), symm.space_group_info(), prob_integrate, prob_correct) if prob_correct > prob_integrate: need_rescale = True pointless_best_symm = symm else: pointless_best_symm = symm_by_integrate else: need_rescale = True pointless_best_symm = symm print >> decilog, "pointless using XDS_ASCII.HKL suggested %s" % symm.space_group_info( ) if xs_prior: if xtal.is_same_space_group_ignoring_enantiomorph( symm.space_group(), xs_prior.space_group()): print >> decilog, " which is consistent with given symmetry." elif xtal.is_same_laue_symmetry( symm.space_group(), xs_prior.space_group()): print >> decilog, " which has consistent Laue symmetry with given symmetry." else: print >> decilog, " which is inconsistent with given symmetry." if need_rescale and not flag_do_not_change_symm: sgnum = symm.space_group_info().type().number() cell = " ".join( map(lambda x: "%.2f" % x, symm.unit_cell().parameters())) modify_xdsinp(xdsinp, inp_params=[ ("JOB", "CORRECT"), ("SPACE_GROUP_NUMBER", "%d" % sgnum), ("UNIT_CELL_CONSTANTS", cell), ("INCLUDE_RESOLUTION_RANGE", "50 0") ]) run_xds(wdir=root, show_progress=params.show_progress) ret = calc_merging_stats(xac_hkl) if params.cut_resolution: if ret is not None and ret[0] is not None: d_min = ret[0] modify_xdsinp(xdsinp, inp_params=[ ("JOB", "CORRECT"), ("INCLUDE_RESOLUTION_RANGE", "50 %.2f" % d_min) ]) print >> decilog, " Re-scale at %.2f A" % d_min os.rename(os.path.join(root, "CORRECT.LP"), os.path.join(root, "CORRECT_fullres.LP")) os.rename( xac_hkl, os.path.join(root, "XDS_ASCII_fullres.HKL")) run_xds(wdir=root, show_progress=params.show_progress) print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) print >> decilog, " (Original files are saved as *_fullres.*)" else: print >> decilog, "error: Can't decide resolution." for f in ("CORRECT_fullres.LP", "XDS_ASCII_fullres.HKL"): if os.path.isfile(os.path.join(root, f)): print >> decilog, "removing", f os.remove(os.path.join(root, f)) ISa = correctlp.get_ISa(correct_lp, check_valid=True) if ISa >= last_ISa or last_ISa != last_ISa: # if improved or last_ISa is nan print >> decilog, "ISa improved= %.2f" % ISa else: print >> decilog, "ISa got worse= %.2f" % ISa if pointless_best_symm: xac_symm = XDS_ASCII(xac_hkl, read_data=False).symm if not xtal.is_same_space_group_ignoring_enantiomorph( xac_symm.space_group(), pointless_best_symm.space_group()): if xtal.is_same_laue_symmetry( xac_symm.space_group(), pointless_best_symm.space_group()): tmp = "same Laue symmetry" else: tmp = "different Laue symmetry" print >> decilog, "WARNING: symmetry in scaling is different from Pointless result (%s)." % tmp run_xdsstat(wdir=root) print if params.make_report: html_report.make_individual_report(root, root) except: print >> decilog, traceback.format_exc() finally: print >> decilog, "\nxds_sequence finished at %s" % time.strftime( "%Y-%m-%d %H:%M:%S") decilog.close()
def try_indexing_hard(wdir, show_progress, decilog, known_sgnum=None, known_cell=None, tol_length=None, tol_angle=None): idxref_lp = os.path.join(wdir, "IDXREF.LP") xdsinp = os.path.join(wdir, "XDS.INP") lp_org = idxreflp.IdxrefLp(idxref_lp) if lp_org.is_cell_maybe_half(): backup_needed = ("XDS.INP", ) + xds_files.generated_by_IDXREF print >> decilog, " !! Cell may be halved. Trying doubled cell." bk_prefix = make_backup(backup_needed, wdir=wdir, quiet=True) cell = lp_org.deduce_correct_cell_based_on_integerness() cell = " ".join(map(lambda x: "%.2f" % x, cell.parameters())) modify_xdsinp(xdsinp, inp_params=[("JOB", "IDXREF"), ("SPACE_GROUP_NUMBER", "1"), ("UNIT_CELL_CONSTANTS", cell)]) for f in xds_files.generated_by_IDXREF: util.rotate_file(os.path.join(wdir, f), copy=(f == "SPOT.XDS")) run_xds(wdir=wdir, show_progress=show_progress) if idxreflp.IdxrefLp(idxref_lp).is_cell_maybe_half(): revert_files(backup_needed, bk_prefix, wdir=wdir, quiet=True) print >> decilog, " .. not solved. Next, try decreasing SEPMIN= and CLUSTER_RADIUS=." bk_prefix = make_backup(backup_needed, wdir=wdir, quiet=True) modify_xdsinp(xdsinp, inp_params=[("JOB", "IDXREF"), ("SEPMIN", "4"), ("CLUSTER_RADIUS", "2")]) for f in xds_files.generated_by_IDXREF: util.rotate_file(os.path.join(wdir, f), copy=(f == "SPOT.XDS")) run_xds(wdir=wdir, show_progress=show_progress) if idxreflp.IdxrefLp(idxref_lp).is_cell_maybe_half(): print >> decilog, " .. not solved. Give up." revert_files(backup_needed, bk_prefix, wdir=wdir, quiet=True) else: print >> decilog, " Now OK." remove_backups(backup_needed, bk_prefix, wdir=wdir) modify_xdsinp(xdsinp, inp_params=[ ("SPACE_GROUP_NUMBER", "0"), ]) # If Cell hint exists, try to use it.. if known_sgnum > 0: flag_try_cell_hint = False xparm = os.path.join(wdir, "XPARM.XDS") if not os.path.isfile(xparm): flag_try_cell_hint = True else: xsxds = XPARM(xparm).crystal_symmetry() xsref = crystal.symmetry(known_cell, known_sgnum) cosets = reindex.reindexing_operators(xsref, xsxds, tol_length, tol_angle) if cosets.double_cosets is None: flag_try_cell_hint = True if flag_try_cell_hint: print >> decilog, " Worth trying to use prior cell for indexing." modify_xdsinp(xdsinp, inp_params=[ ("JOB", "IDXREF"), ("UNIT_CELL_CONSTANTS", " ".join(map(lambda x: "%.3f" % x, known_cell))), ("SPACE_GROUP_NUMBER", "%d" % known_sgnum), ]) for f in xds_files.generated_by_IDXREF: util.rotate_file(os.path.join(wdir, f), copy=(f == "SPOT.XDS")) run_xds(wdir=wdir, show_progress=False) modify_xdsinp(xdsinp, inp_params=[ ("SPACE_GROUP_NUMBER", "0"), ])
def find_spots(img_file, params): """ Use XDS to locate spots. If params.xds.do_defpix is true, DEFPIX will be run. For DEFPIX, dummy XPARM.XDS is needed. Thanks to DEFPIX, we can mask beam stop shadow and remove areas outside the resolution range. If false, we need to set TRUSTED_REGION to exclude regions outside resolution range, but it is independent of beam center. Maybe we should remove spots outside the resolution range after XDS run? """ # Test if ramdisk available if os.path.isdir("/dev/shm"): work_dir = tempfile.mkdtemp( prefix="shika_x_" + os.path.splitext(os.path.basename(img_file))[0], dir="/dev/shm") else: work_dir = os.path.join( params.work_dir, "xds_" + os.path.splitext(os.path.basename(img_file))[0]) xdsinp = os.path.join(work_dir, "XDS.INP") spot_xds = os.path.join(work_dir, "SPOT.XDS") if not os.path.exists(work_dir): os.mkdir(work_dir) template_str, min_frame, max_frame = dataset.group_img_files_template( [img_file])[0] im = XIO.Image(img_file) # Remove lines if None (means to use default) params_maps = [ ("strong_pixel", "STRONG_PIXEL="), ("minimum_number_of_pixels_in_a_spot", "MINIMUM_NUMBER_OF_PIXELS_IN_A_SPOT="), ("background_pixel", "BACKGROUND_PIXEL="), ("maximum_number_of_strong_pixels", "MAXIMUM_NUMBER_OF_STRONG_PIXELS="), ("spot_maximum_centroid", "SPOT_MAXIMUM-CENTROID="), ("reflecting_range", "REFLECTING_RANGE="), ("value_range_for_trusted_detector_pixels", "VALUE_RANGE_FOR_TRUSTED_DETECTOR_PIXELS="), ] tmp = xds_inp_template.splitlines() for p, x in params_maps: if getattr(params.xds, p) is None: tmp = filter(lambda s: not s.startswith(x), tmp) inp_template = "\n".join(tmp) # Prepare XDS.INP inp_str = inp_template % dict( template=os.path.relpath(template_str, work_dir), framenum=min_frame, orgx=im.header["BeamX"] / im.header["PixelX"], orgy=im.header["BeamY"] / im.header["PixelY"], distance=im.header["Distance"], osc_range=im.header["PhiWidth"], wavelength=im.header["Wavelength"], strong_pixel=params.xds.strong_pixel, min_pixels=params.xds.minimum_number_of_pixels_in_a_spot, background_pixel=params.xds.background_pixel, max_strong_pixels=params.xds.maximum_number_of_strong_pixels, spot_maximum_centroid=params.xds.spot_maximum_centroid, reflecting_range=params.xds.reflecting_range, nx=im.header["Width"], ny=im.header["Height"], qx=im.header["PixelX"], qy=im.header["PixelY"], defpix_trusted1=params.xds.value_range_for_trusted_detector_pixels[0], defpix_trusted2=params.xds.value_range_for_trusted_detector_pixels[1]) open(xdsinp, "w").write(inp_str) if params.xds.do_defpix: xp = xparm.XPARM() xp.set_info_from_xdsinp(xdsinp) open(os.path.join(work_dir, "XPARM.XDS"), "w").write(xp.xparm_str()) modify_xdsinp(xdsinp, inp_params=[("JOB", "XYCORR INIT DEFPIX"), ("INCLUDE_RESOLUTION_RANGE", res_range_for_xds(params.distl.res.outer, params.distl.res.inner))]) call("xds", wdir=work_dir, stdout=open(os.path.join(work_dir, "xds.log"), "w")) shutil.copy(os.path.join(work_dir, "BKGPIX.cbf"), os.path.join(work_dir, "BKGINIT.cbf")) modify_xdsinp(xdsinp, inp_params=[("JOB", "COLSPOT")]) else: modify_xdsinp(xdsinp, inp_params=[ ("TRUSTED_REGION", res_range_to_trusted_region(params.distl.res.outer, params.distl.res.inner, im.header)) ]) open(os.path.join(work_dir, "xds.log"), "w").write("") # Run XDS rotate_file(spot_xds) call("xds", wdir=work_dir, stdout=open(os.path.join(work_dir, "xds.log"), "a")) # Extract results spots = [] # (x, y, d, intensity) if os.path.isfile(spot_xds): for l in open(spot_xds): x, y, z, intensity = map(lambda x: float(x), l.strip().split()) d = coord_to_resol(x, y, im.header) spots.append((x, y, d, intensity)) # Delete dir shutil.rmtree(work_dir) return spots
def run(params, xac_files): if len(xac_files) == 0: print "No XDS_ASCII.HKL files provided." return # Parse dmin_dict = {} if params.dmin_lst: for l in open(params.dmin_lst): sp = l.split() if len(sp) != 2: continue f, dmin = sp dmin_dict[f] = dmin xscale_inp_head = "!SNRC= 3 ! was MINIMUM_I/SIGMA= before BUILT=20191015\n\n" if params.wfac1 is not None: xscale_inp_head += "WFAC1= %.3f\n" % params.wfac1 if params.nproc: xscale_inp_head += "MAXIMUM_NUMBER_OF_PROCESSORS= %d\n" % params.nproc infos = {} d_max, d_min = 0, 100 cells = [] for xds_ascii in xac_files: info = get_xac_info(xds_ascii, get_nframes=params.frames_per_batch is not None) if xds_ascii in dmin_dict: dmax, dmin = info["resol_range"].split() info["resol_range_user"] = "******" % (dmax, dmin_dict[xds_ascii]) infos[xds_ascii] = info resrng = map(float, info["resol_range"].split()) d_max = max(d_max, resrng[0]) d_min = min(d_min, resrng[1]) cells.append(map(float, info["cell"].split())) if params.d_min is not None: d_min = max(params.d_min, d_min) if params.cell == "average": cell_sum = reduce(lambda x, y: map(lambda a: a[0] + a[1], zip(x, y)), cells) cell_mean = map(lambda x: x / float(len(cells)), cell_sum) if params.sgnum is not None: sgnum = str(params.sgnum) else: sgnum = infos[xac_files[0]]["spgr_num"] xscale_inp_head += " SPACE_GROUP_NUMBER= %s\n" % sgnum xscale_inp_head += " UNIT_CELL_CONSTANTS= %s\n" % " ".join( map(lambda x: "%.3f" % x, cell_mean)) xscale_inp_head += make_shells(d_max, d_min, params.nbins) + "\n" xscale_inp_head += " OUTPUT_FILE= %s\n" % params.output xscale_inp_head += " FRIEDEL'S_LAW= %s\n\n" % ("FALSE" if params.anomalous else "TRUE") prep_xscale_inp(params.workdir, xscale_inp_head, xac_files, infos, params.frames_per_batch, params.corrections) xscale.run_xscale(os.path.join(params.workdir, "XSCALE.INP"), cbf_to_dat=params.cbf_to_dat, use_tmpdir_if_available=params.use_tmpdir_if_available) if params.reference: print "Choosing reference data (reference=%s)" % params.reference ref_idx = xscale.decide_scaling_reference_based_on_bfactor( os.path.join(params.workdir, "XSCALE.LP"), params.reference, return_as="index") if ref_idx != 0: for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(params.workdir, f)) prep_xscale_inp(params.workdir, xscale_inp_head, xac_files, infos, params.frames_per_batch, params.corrections, ref_idx=ref_idx) xscale.run_xscale( os.path.join(params.workdir, "XSCALE.INP"), cbf_to_dat=params.cbf_to_dat, use_tmpdir_if_available=params.use_tmpdir_if_available)
def find_spots(img_file, params): """ Use XDS to locate spots. If params.xds.do_defpix is true, DEFPIX will be run. For DEFPIX, dummy XPARM.XDS is needed. Thanks to DEFPIX, we can mask beam stop shadow and remove areas outside the resolution range. If false, we need to set TRUSTED_REGION to exclude regions outside resolution range, but it is independent of beam center. Maybe we should remove spots outside the resolution range after XDS run? """ # Test if ramdisk available if os.path.isdir("/dev/shm"): work_dir = tempfile.mkdtemp(prefix="shika_x_"+os.path.splitext(os.path.basename(img_file))[0], dir="/dev/shm") else: work_dir = os.path.join(params.work_dir, "xds_"+os.path.splitext(os.path.basename(img_file))[0]) xdsinp = os.path.join(work_dir, "XDS.INP") spot_xds = os.path.join(work_dir, "SPOT.XDS") if not os.path.exists(work_dir): os.mkdir(work_dir) template_str, min_frame, max_frame = dataset.group_img_files_template([img_file])[0] im = XIO.Image(img_file) # Remove lines if None (means to use default) params_maps = [("strong_pixel", "STRONG_PIXEL="), ("minimum_number_of_pixels_in_a_spot", "MINIMUM_NUMBER_OF_PIXELS_IN_A_SPOT="), ("background_pixel", "BACKGROUND_PIXEL="), ("maximum_number_of_strong_pixels", "MAXIMUM_NUMBER_OF_STRONG_PIXELS="), ("spot_maximum_centroid", "SPOT_MAXIMUM-CENTROID="), ("reflecting_range", "REFLECTING_RANGE="), ("value_range_for_trusted_detector_pixels", "VALUE_RANGE_FOR_TRUSTED_DETECTOR_PIXELS="), ] tmp = xds_inp_template.splitlines() for p, x in params_maps: if getattr(params.xds, p) is None: tmp = filter(lambda s:not s.startswith(x), tmp) inp_template = "\n".join(tmp) # Prepare XDS.INP inp_str = inp_template%dict(template=os.path.relpath(template_str, work_dir), framenum=min_frame, orgx=im.header["BeamX"]/im.header["PixelX"], orgy=im.header["BeamY"]/im.header["PixelY"], distance=im.header["Distance"], osc_range=im.header["PhiWidth"], wavelength=im.header["Wavelength"], strong_pixel=params.xds.strong_pixel, min_pixels=params.xds.minimum_number_of_pixels_in_a_spot, background_pixel=params.xds.background_pixel, max_strong_pixels=params.xds.maximum_number_of_strong_pixels, spot_maximum_centroid=params.xds.spot_maximum_centroid, reflecting_range=params.xds.reflecting_range, nx=im.header["Width"], ny=im.header["Height"], qx=im.header["PixelX"], qy=im.header["PixelY"], defpix_trusted1=params.xds.value_range_for_trusted_detector_pixels[0], defpix_trusted2=params.xds.value_range_for_trusted_detector_pixels[1] ) open(xdsinp, "w").write(inp_str) if params.xds.do_defpix: xp = xparm.XPARM() xp.set_info_from_xdsinp(xdsinp) open(os.path.join(work_dir, "XPARM.XDS"), "w").write(xp.xparm_str()) modify_xdsinp(xdsinp, inp_params=[("JOB", "XYCORR INIT DEFPIX"), ("INCLUDE_RESOLUTION_RANGE", res_range_for_xds(params.distl.res.outer, params.distl.res.inner)) ]) call("xds", wdir=work_dir, stdout=open(os.path.join(work_dir, "xds.log"), "w")) shutil.copy(os.path.join(work_dir, "BKGPIX.cbf"), os.path.join(work_dir, "BKGINIT.cbf")) modify_xdsinp(xdsinp, inp_params=[("JOB","COLSPOT")]) else: modify_xdsinp(xdsinp, inp_params=[("TRUSTED_REGION", res_range_to_trusted_region(params.distl.res.outer, params.distl.res.inner, im.header)) ]) open(os.path.join(work_dir, "xds.log"), "w").write("") # Run XDS rotate_file(spot_xds) call("xds", wdir=work_dir, stdout=open(os.path.join(work_dir, "xds.log"), "a")) # Extract results spots = [] # (x, y, d, intensity) if os.path.isfile(spot_xds): for l in open(spot_xds): x, y, z, intensity = map(lambda x:float(x), l.strip().split()) d = coord_to_resol(x, y, im.header) spots.append((x, y, d, intensity)) # Delete dir shutil.rmtree(work_dir) return spots
def run_cycle(self, xds_ascii_files, reference_idx=None): if len(xds_ascii_files) == 0: print >>self.out, "Error: no files given." return xscale_inp = os.path.join(self.workdir, "XSCALE.INP") xscale_lp = os.path.join(self.workdir, "XSCALE.LP") # Get averaged cell for scaling sg, cell = self.average_cells(xds_ascii_files) # Choose directory containing XDS_ASCII.HKL and set space group (but how??) inp_out = open(xscale_inp, "w") inp_out.write("MAXIMUM_NUMBER_OF_PROCESSORS= %d\n" % self.nproc) inp_out.write("SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell)) inp_out.write(self.xscale_inp_head) for i, xds_ascii in enumerate(xds_ascii_files): f = self.altfile.get(xds_ascii, xds_ascii) tmp = min(os.path.relpath(f, self.workdir), f, key=lambda x:len(x)) refstr = "*" if i==reference_idx else " " inp_out.write(" INPUT_FILE=%s%s\n" % (refstr,tmp)) if len(self.xscale_params.corrections) != 3: inp_out.write(" CORRECTIONS= %s\n" % " ".join(self.xscale_params.corrections)) if self.xscale_params.frames_per_batch is not None: frame_range = XDS_ASCII(f, read_data=False).get_frame_range() nframes = frame_range[1] - frame_range[0] nbatch = int(numpy.ceil(nframes / self.xscale_params.frames_per_batch)) print >>self.out, "frame range of %s is %d,%d setting NBATCH= %d" % (f, frame_range[0], frame_range[1], nbatch) inp_out.write(" NBATCH= %d\n" % nbatch) inp_out.close() print >>self.out, "DEBUG:: running xscale with %3d files.." % len(xds_ascii_files) xscale.run_xscale(xscale_inp) #util.call(xscale_comm, wdir=self.workdir) cbfouts = glob.glob(os.path.join(self.workdir, "*.cbf")) if len(cbfouts) > 0: # This doesn't affect anything, so I don't want program to stop if this failed try: xscalelp.cbf_to_dat(xscale_lp) for f in cbfouts: os.remove(f) except: print >>self.out, traceback.format_exc() xscale_log = open(xscale_lp).read() if "!!! ERROR !!! INSUFFICIENT NUMBER OF COMMON STRONG REFLECTIONS." in xscale_log: print >>self.out, "DEBUG:: Need to choose files." # From XDS ver. March 1, 2015, it kindly informs which dataset has no common reflections. # ..but does not print the table. Sometimes only one dataset is left. Should we make table by ourselves? # Older versions just print correlation table and stop. if "CORRELATIONS BETWEEN INPUT DATA SETS AFTER CORRECTIONS" in xscale_log: G = xscalelp.construct_data_graph(xscale_lp, min_common_refs=10) #nx.write_dot(G, os.path.join(self.workdir, "common_set_graph.dot")) cliques = [c for c in nx.find_cliques(G)] cliques.sort(key=lambda x:len(x)) if self._counter == 1: max_clique = cliques[-1] else: idx_prevfile = 1 if self.reference_file else 0 max_clique = filter(lambda x: idx_prevfile in x, cliques)[-1] # xscale.hkl must be included! if self.reference_file: max_clique = [0,] + filter(lambda x: x!=0, max_clique) for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) try_later = map(lambda i: xds_ascii_files[i], filter(lambda x: x not in max_clique, G.nodes())) print >>self.out, "DEBUG:: %d files can be merged. %d files will be merged later." % (len(max_clique), len(try_later)) print >>self.out, "DEBUG:: %d files are of no use." % (len(xds_ascii_files)-len(G.nodes())) for i in filter(lambda j: j not in G.nodes(), xrange(len(xds_ascii_files))): self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "no_common_refls" self.run_cycle(map(lambda i: xds_ascii_files[i], max_clique)) assert len(try_later) <= 0 # Never be the case with newer xscale!! (if the case, check_remove_list() should be modified to skip_num+=1 if len(try_later) > 0: print >>self.out, "Trying to merge %d remaining files.." % len(try_later) next_files = [os.path.join(self.workdir, "xscale.hkl")] + try_later if self.reference_file: next_files = [self.reference_file,] + next_files self.workdir = self.request_next_workdir() self.run_cycle(next_files) return else: bad_idxes = xscalelp.read_no_common_ref_datasets(xscale_lp) print >>self.out, "DEBUG:: %d files are of no use." % (len(bad_idxes)) for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) # XXX Actually, not all datasets need to be thrown.. some of them are useful.. for i in bad_idxes: self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "no_common_refls" self.run_cycle(map(lambda i: xds_ascii_files[i], filter(lambda j: j not in bad_idxes, xrange(len(xds_ascii_files))))) return elif "!!! ERROR !!! USELESS DATA ON INPUT REFLECTION FILE" in xscale_log: print >>self.out, "DEBUG:: Need to discard useless data." unuseful_data = [xscalelp.get_read_data(xscale_lp)[-1]] #filter(lambda x: x[2]==0, xscalelp.get_read_data(xscale_lp)) if len(unuseful_data) == 0: print >>self.out, "I don't know how to fix it.." return remove_idxes = map(lambda x: x[0]-1, unuseful_data) remove_idxes = self.check_remove_list(remove_idxes) keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files))) for i in remove_idxes: self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "useless" for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes)) return elif "INACCURATE SCALING FACTORS." in xscale_log: # Actually I don't know how to fix this.. (bug?) but worth proceeding (discarding bad data may solve problem). print >>self.out, "'INACCURATE SCALING FACTORS' happened.. but ignored." elif "!!! ERROR !!!" in xscale_log: print >>self.out, "Unknown error! please check the XSCALE.LP and fix the program." return # Re-scale by changing reference rescale_for = None if len(self.reject_method) == 0: rescale_for = self.reference_choice # may be None elif reference_idx is None: rescale_for = "bmed" if rescale_for is not None and len(xds_ascii_files) > 1: ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, rescale_for, return_as="index") if reference_idx != ref_num: print >>self.out, "Rescaling with %s" % rescale_for for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(xds_ascii_files, reference_idx=ref_num) if len(self.reject_method) == 0: return # Remove bad data remove_idxes = [] remove_reasons = {} if self.reject_method[0] == "framecc": print >>self.out, "Rejections based on frame CC" from yamtbx.dataproc.xds.command_line import xscale_cc_against_merged # list of [frame, n_all, n_common, cc] in the same order framecc = xscale_cc_against_merged.run(hklin=os.path.join(self.workdir, "xscale.hkl"), output_dir=self.workdir, nproc=self.nproc).values() if self.reject_params.framecc.method == "tukey": ccs = numpy.array(map(lambda x: x[3], reduce(lambda x,y:x+y,framecc))) q25, q75 = numpy.percentile(ccs, [25, 75]) cc_cutoff = q25 - self.reject_params.framecc.iqr_coeff * (q75 - q25) print >>self.out, " frameCC cutoff = %.4f (%.2f*IQR)" % (cc_cutoff, self.reject_params.framecc.iqr_coeff) else: cc_cutoff = self.reject_params.framecc.abs_cutoff print >>self.out, " frameCC cutoff = %.4f (value specified)" % cc_cutoff for i, cclist in enumerate(framecc): useframes = map(lambda x: x[0], filter(lambda x: x[3] > cc_cutoff, cclist)) if len(useframes) == 0: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("allbadframe") continue f = xds_ascii_files[i] xac = XDS_ASCII(f) if set(useframes).issuperset(set(range(min(xac.iframe), max(xac.iframe)))): continue # All useful frames. sel = xac.iframe == useframes[0] for x in useframes[1:]: sel |= xac.iframe == x if sum(sel) < 10: # XXX care I/sigma remove_idxes.append(i) remove_reasons.setdefault(i, []).append("allbadframe") continue print >>self.out, "Extracting frames %s out of %d-%d in %s" % (",".join(map(str,useframes)), min(xac.iframe), max(xac.iframe), f) newf = self.request_file_modify(f) xac.write_selected(sel, newf) self.reject_method.pop(0) # Perform only once elif self.reject_method[0] == "lpstats": if "bfactor" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on B-factor outliers (%.2f*IQR)" % iqrc Bs = numpy.array(map(lambda x:x[1], xscalelp.get_k_b(xscale_lp))) q25, q75 = numpy.percentile(Bs, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, b in enumerate(Bs): if b < lowlim or b > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_B") count += 1 print >>self.out, " %4d B-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "em.b" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on error model b outliers (%.2f*IQR)" % iqrc bs = numpy.array(map(lambda x:x[1], xscalelp.get_ISa(xscale_lp))) q25, q75 = numpy.percentile(bs, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, b in enumerate(bs): if b < lowlim or b > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_em.b") count += 1 print >>self.out, " %4d error model b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "em.ab" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on error model a*b outliers (%.2f*IQR)" % iqrc vals = numpy.array(map(lambda x:x[0]*x[1], xscalelp.get_ISa(xscale_lp))) q25, q75 = numpy.percentile(vals, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, ab in enumerate(vals): if ab < lowlim or ab > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_em.ab") count += 1 print >>self.out, " %4d error model a*b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "rfactor" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >>self.out, "Rejections based on R-factor outliers (%.2f*IQR)" % iqrc rstats = xscalelp.get_rfactors_for_each(xscale_lp) vals = numpy.array(map(lambda x:rstats[x][-1][1], rstats)) # Read total R-factor q25, q75 = numpy.percentile(vals, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr count = 0 for i, v in enumerate(vals): if v < lowlim or v > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_R") count += 1 print >>self.out, " %4d R-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim) if "pairwise_cc" in self.reject_params.lpstats.stats: corrs = xscalelp.get_pairwise_correlations(xscale_lp) if self.reject_params.lpstats.pwcc.method == "tukey": q25, q75 = numpy.percentile(map(lambda x: x[3], corrs), [25, 75]) iqr = q75 - q25 lowlim = q25 - self.reject_params.lpstats.pwcc.iqr_coeff * iqr print >>self.out, "Rejections based on pairwise_cc < %.4f (IQR=%.2f)" % (lowlim, iqr) else: lowlim = self.reject_params.lpstats.pwcc.abs_cutoff print >>self.out, "Rejections based on pairwise_cc < %.4f" % lowlim bad_corrs = filter(lambda x: x[3] < lowlim, corrs) idx_bad = {} for i, j, common_refs, corr, ratio, bfac in bad_corrs: idx_bad[i] = idx_bad.get(i, 0) + 1 idx_bad[j] = idx_bad.get(j, 0) + 1 idx_bad = idx_bad.items() idx_bad.sort(key=lambda x:x[1]) count = 0 for idx, badcount in reversed(idx_bad): remove_idxes.append(idx-1) remove_reasons.setdefault(idx-1, []).append("bad_pwcc") bad_corrs = filter(lambda x: idx not in x[:2], bad_corrs) if len(bad_corrs) == 0: break fun_key = lambda x: x[3] print >>self.out, " Removing idx=%d (CC %.3f..%.3f) remaining %d bad pairs" % (idx, min(bad_corrs,key=fun_key)[3], max(bad_corrs,key=fun_key)[3], len(bad_corrs)) count += 1 print >>self.out, " %4d pairwise CC outliers removed" % count self.reject_method.pop(0) # Perform only once elif self.reject_method[0] == "delta_cc1/2": print >>self.out, "Rejection based on delta_CC1/2 in %s shell" % self.delta_cchalf_bin table = xscalelp.read_stats_table(xscale_lp) i_stat = -1 if self.delta_cchalf_bin == "total" else -2 prev_cchalf = table["cc_half"][i_stat] prev_nuniq = table["nuniq"][i_stat] # file_name->idx table remaining_files = collections.OrderedDict(map(lambda x: x[::-1], enumerate(xds_ascii_files))) # For consistent resolution limit inp_head = self.xscale_inp_head + "SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell) count = 0 for i in xrange(len(xds_ascii_files)-1): # if only one file, cannot proceed. tmpdir = os.path.join(self.workdir, "reject_test_%.3d" % i) cchalf_list = xscale.calc_cchalf_by_removing(wdir=tmpdir, inp_head=inp_head, inpfiles=remaining_files.keys(), stat_bin=self.delta_cchalf_bin, nproc=self.nproc, nproc_each=self.nproc_each, batchjobs=self.batchjobs) rem_idx, cc_i, nuniq_i = cchalf_list[0] # First (largest) is worst one to remove. rem_idx_in_org = remaining_files[remaining_files.keys()[rem_idx]] # Decision making by CC1/2 print >>self.out, "DEBUG:: cycle %.3d remove %3d if %.2f*%d > %.2f*%d" % (i, rem_idx_in_org, cc_i, nuniq_i, prev_cchalf, prev_nuniq) if cc_i*nuniq_i <= prev_cchalf*prev_nuniq: break print >>self.out, "Removing idx= %3d gained CC1/2 by %.2f" % (rem_idx_in_org, cc_i-prev_cchalf) prev_cchalf, prev_nuniq = cc_i, nuniq_i remove_idxes.append(rem_idx_in_org) remove_reasons.setdefault(rem_idx_in_org, []).append("bad_cchalf") del remaining_files[remaining_files.keys()[rem_idx]] # remove file from table count += 1 print >>self.out, " %4d removed by DeltaCC1/2 method" % count if self.next_delta_cchalf_bin != []: self.delta_cchalf_bin = self.next_delta_cchalf_bin.pop(0) else: self.reject_method.pop(0) else: print >>self.out, "ERROR:: Unsupported reject_method (%s)" % reject_method # Remove duplicates remove_idxes = list(set(remove_idxes)) remove_idxes = self.check_remove_list(remove_idxes) if len(remove_idxes) > 0: print >>self.out, "DEBUG:: Need to remove %d files" % len(remove_idxes) for i in sorted(remove_idxes): print >>self.out, " %.3d %s" % (i, xds_ascii_files[i]) self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = ",".join(remove_reasons[i]) # Next run keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files))) if len(self.reject_method) > 0 or len(remove_idxes) > 0: self.workdir = self.request_next_workdir() self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes)) elif self.reference_choice is not None and len(keep_idxes) > 1: # Just re-scale with B reference ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, self.reference_choice, return_as="index") if reference_idx != ref_num: print >>self.out, "Rescaling2 with %s" % self.reference_choice for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes), reference_idx=ref_num)
def xds_sequence(root, params): print print os.path.relpath(root, params.topdir) xparm = os.path.join(root, "XPARM.XDS") gxparm = os.path.join(root, "GXPARM.XDS") defpix_lp = os.path.join(root, "DEFPIX.LP") correct_lp = os.path.join(root, "CORRECT.LP") integrate_hkl = os.path.join(root, "INTEGRATE.HKL") xac_hkl = os.path.join(root, "XDS_ASCII.HKL") integrate_lp = os.path.join(root, "INTEGRATE.LP") xdsinp = os.path.join(root, "XDS.INP") assert os.path.isfile(xdsinp) decilog = multi_out() decilog.register("log", open(os.path.join(root, "decision.log"), "a"), atexit_send_to=None) print >>decilog, "xds_sequence started at %s in %s\n" % (time.strftime("%Y-%m-%d %H:%M:%S"), root) if params.show_progress: decilog.register("stdout", sys.stdout) if params.mode=="initial" and params.resume and os.path.isfile(correct_lp): print " Already processed." return if params.mode == "recycle" and not os.path.isfile(gxparm): print "GXPARM.XDS not found. Cannot do recycle." return if params.fast_delphi and (params.nproc is None or params.nproc > 1): delphi = optimal_delphi_by_nproc(xdsinp=xdsinp, nproc=params.nproc) print " Setting delphi to ", delphi modify_xdsinp(xdsinp, inp_params=[("DELPHI", str(delphi)), ]) if params.nproc is not None and params.nproc > 1: modify_xdsinp(xdsinp, inp_params=[("MAXIMUM_NUMBER_OF_PROCESSORS", str(params.nproc)), ]) if params.mode == "initial": # To Indexing modify_xdsinp(xdsinp, inp_params=[("JOB", "XYCORR INIT COLSPOT IDXREF")]) run_xds(wdir=root, show_progress=params.show_progress) print # indexing stats like indexed percentage here. if params.tryhard: try_indexing_hard(root, params.show_progress, decilog, known_sgnum=params.cell_prior.sgnum, known_cell=params.cell_prior.cell, tol_length=params.cell_prior.tol_length, tol_angle=params.cell_prior.tol_angle) if not os.path.isfile(xparm): print >>decilog, " Indexing failed." return if params.cell_prior.check and params.cell_prior.sgnum > 0: xsxds = XPARM(xparm).crystal_symmetry() xsref = crystal.symmetry(params.cell_prior.cell, params.cell_prior.sgnum) cosets = reindex.reindexing_operators(xsref, xsxds, params.cell_prior.tol_length, params.cell_prior.tol_angle) if cosets.double_cosets is None: print >>decilog, " Incompatible cell. Indexing failed." return elif params.mode == "recycle": print " Start recycle. original ISa= %.2f" % correctlp.get_ISa(correct_lp, check_valid=True) for f in xds_files.generated_after_DEFPIX + ("XPARM.XDS", "plot_integrate.log"): util.rotate_file(os.path.join(root, f), copy=True) shutil.copyfile(gxparm+".1", xparm) else: raise "Unknown mode (%s)" % params.mode # To Integration modify_xdsinp(xdsinp, inp_params=[("JOB", "DEFPIX INTEGRATE"), ("INCLUDE_RESOLUTION_RANGE", "50 0")]) run_xds(wdir=root, show_progress=params.show_progress) if os.path.isfile(integrate_lp): xds_plot_integrate.run(integrate_lp, os.path.join(root, "plot_integrate.log")) if not os.path.isfile(integrate_hkl): print >>decilog, " Integration failed." return # Make _noscale.HKL if needed if params.no_scaling: bk_prefix = make_backup(("XDS.INP",), wdir=root, quiet=True) xparm_obj = XPARM(xparm) modify_xdsinp(xdsinp, inp_params=[("JOB", "CORRECT"), ("CORRECTIONS", ""), ("NBATCH", "1"), ("MINIMUM_I/SIGMA", "50"), ("REFINE(CORRECT)", ""), ("UNIT_CELL_CONSTANTS", " ".join(map(lambda x:"%.3f"%x, xparm_obj.unit_cell))), ("SPACE_GROUP_NUMBER", "%d"%xparm_obj.spacegroup),]) print >>decilog, " running CORRECT without empirical scaling" run_xds(wdir=root, show_progress=params.show_progress) for f in xds_files.generated_by_CORRECT + ("XDS.INP",): ff = os.path.join(root, f) if not os.path.isfile(ff): continue if ff.endswith(".cbf"): os.remove(ff) else: os.rename(ff, ff+"_noscale") revert_files(("XDS.INP",), bk_prefix, wdir=root, quiet=True) # Run pointless symm_by_integrate = None if params.use_pointless: worker = Pointless() result = worker.run_for_symm(xdsin=integrate_hkl, logout=os.path.join(root, "pointless_integrate.log")) if "symm" in result: symm = result["symm"] print >>decilog, " pointless using INTEGRATE.HKL suggested", symm.space_group_info() sgnum = symm.space_group_info().type().number() cell = " ".join(map(lambda x:"%.2f"%x, symm.unit_cell().parameters())) modify_xdsinp(xdsinp, inp_params=[("SPACE_GROUP_NUMBER", "%d"%sgnum), ("UNIT_CELL_CONSTANTS", cell)]) symm_by_integrate = symm else: print >>decilog, " pointless failed." # Do Scaling modify_xdsinp(xdsinp, inp_params=[("JOB", "CORRECT"),]) run_xds(wdir=root, show_progress=params.show_progress) if not os.path.isfile(gxparm): print >>decilog, " Scaling failed." return print >>decilog, " OK. ISa= %.2f" % correctlp.get_ISa(correct_lp, check_valid=True) ret = calc_merging_stats(os.path.join(root, "XDS_ASCII.HKL")) if params.cut_resolution: if ret is not None and ret[0] is not None: d_min = ret[0] modify_xdsinp(xdsinp, inp_params=[("JOB", "CORRECT"), ("INCLUDE_RESOLUTION_RANGE", "50 %.2f"%d_min)]) print >>decilog, " Re-scale at %.2f A" % d_min os.rename(os.path.join(root, "CORRECT.LP"), os.path.join(root, "CORRECT_fullres.LP")) os.rename(os.path.join(root, "XDS_ASCII.HKL"), os.path.join(root, "XDS_ASCII_fullres.HKL")) run_xds(wdir=root, show_progress=params.show_progress) print >>decilog, " OK. ISa= %.2f" % correctlp.get_ISa(correct_lp, check_valid=True) print >>decilog, " (Original files are saved as *_fullres.*)" else: print >>decilog, "error: Can't decide resolution." last_ISa = correctlp.get_ISa(correct_lp, check_valid=True) # Run pointless and (if result is different from INTEGRATE) re-scale. if params.use_pointless: worker = Pointless() result = worker.run_for_symm(xdsin=xac_hkl, logout=os.path.join(root, "pointless_correct.log")) if "symm" in result: symm = result["symm"] need_rescale = False if symm_by_integrate is not None: if not xtal.is_same_laue_symmetry(symm_by_integrate.space_group(), symm.space_group()): print >>decilog, "pointless suggested %s, which is different Laue symmetry from INTEGRATE.HKL (%s)" % (symm.space_group_info(), symm_by_integrate.space_group_info()) need_rescale = True else: print >>decilog, "pointless using XDS_ASCII.HKL suggested %s" % symm.space_group_info() need_rescale = True if need_rescale: # make backup, and do correct and compare ISa # if ISa got worse, revert the result. backup_needed = ("XDS.INP", "XDS_ASCII_fullres.HKL","CORRECT_fullres.LP", "merging_stats.pkl","merging_stats.log") backup_needed += xds_files.generated_by_CORRECT bk_prefix = make_backup(backup_needed, wdir=root, quiet=True) sgnum = symm.space_group_info().type().number() cell = " ".join(map(lambda x:"%.2f"%x, symm.unit_cell().parameters())) modify_xdsinp(xdsinp, inp_params=[("JOB", "CORRECT"), ("SPACE_GROUP_NUMBER", "%d"%sgnum), ("UNIT_CELL_CONSTANTS", cell), ("INCLUDE_RESOLUTION_RANGE", "50 0")]) run_xds(wdir=root, show_progress=params.show_progress) ret = calc_merging_stats(os.path.join(root, "XDS_ASCII.HKL")) if params.cut_resolution: if ret is not None and ret[0] is not None: d_min = ret[0] modify_xdsinp(xdsinp, inp_params=[("JOB", "CORRECT"), ("INCLUDE_RESOLUTION_RANGE", "50 %.2f"%d_min)]) print >>decilog, " Re-scale at %.2f A" % d_min os.rename(os.path.join(root, "CORRECT.LP"), os.path.join(root, "CORRECT_fullres.LP")) os.rename(os.path.join(root, "XDS_ASCII.HKL"), os.path.join(root, "XDS_ASCII_fullres.HKL")) run_xds(wdir=root, show_progress=params.show_progress) print >>decilog, " OK. ISa= %.2f" % correctlp.get_ISa(correct_lp, check_valid=True) print >>decilog, " (Original files are saved as *_fullres.*)" else: print >>decilog, "error: Can't decide resolution." for f in ("CORRECT_fullres.LP", "XDS_ASCII_fullres.HKL"): if os.path.isfile(os.path.join(root, f)): print >>decilog, "removing", f os.remove(os.path.join(root, f)) ISa = correctlp.get_ISa(correct_lp, check_valid=True) if ISa >= last_ISa or last_ISa!=last_ISa: # if improved or last_ISa is nan print >>decilog, "ISa improved= %.2f" % ISa remove_backups(backup_needed, bk_prefix, wdir=root) else: print >>decilog, "ISa got worse= %.2f" % ISa for f in backup_needed: if os.path.isfile(os.path.join(root, f)): os.remove(os.path.join(root, f)) revert_files(backup_needed, bk_prefix, wdir=root, quiet=True) run_xdsstat(wdir=root) print if params.make_report: html_report.make_individual_report(root, root) print >>decilog, "xds_sequence finished at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S") decilog.close()
def prepare_plot(plot_data, f, kind, wdir, rotate=False, plot_grid=True): def normalize_max(v, maximum=400.): max_v = max(v) f = maximum / max_v if max_v > 0 else 1. return map( lambda x: f * x + 1., v) # add 1 to make zero-value pickable # XXX when max_v is Inf? # normalize_max() scan_prefix = f[:f.index(" ")] if " (phi=" in f else f pngout = os.path.join(wdir, "plot_%s%s.png" % (scan_prefix, kind)) if rotate: rotate_file(pngout) xs, ys, ds, imgfs = [], [], [], [] zero_xs, zero_ys = [], [] # For values of zero for imgf, stat in plot_data[f]: gc = stat.grid_coord if gc is None: continue x, y = gc x *= 1000. y *= 1000. d = stat.stats[("n_spots", "total_integrated_signal", "median_integrated_signal").index(kind)] xs.append(x) ys.append(y) ds.append(d) imgfs.append(imgf) if d == 0: zero_xs.append(x) zero_ys.append(y) if len(xs) == 0: return "", "" win = (max(xs) - min(xs) + 1000) / 1000 * 400 / 80 * 1.7 # ad-hoc scale hin = (max(ys) - min(ys) + 1000) / 1000 * 400 / 80 fig = matplotlib.figure.Figure(figsize=(win, hin), dpi=80) # figsize in inches ax = fig.add_subplot(111) #p = ax.scatter(xs, ys, s=normalize_max(ds), c=ds, alpha=0.5) # s in points^2 scaninfo = plot_data[f][0][1].scan_info if plot_grid: p = plot_heatmap(ax, xs, ys, ds, scaninfo) else: p, _ = plot_circles(ax, xs, ys, ds, zero_xs, zero_ys) if max(ds) - min(ds) > 1e-5: fig.colorbar(p) ax.scatter(zero_xs, zero_ys, s=50, marker="x", c=[0] * len(zero_xs), alpha=0.5) ax.set_xlabel("horizontal [um]") ax.set_ylabel("vertical [um]") if scaninfo is not None: vp, hp = scaninfo.vpoints, scaninfo.hpoints vs, hs = scaninfo.vstep * 1000., scaninfo.hstep * 1000. if 1 in (vp, hp) or len(plot_data[f]) <= hp: ax.set_aspect("auto") else: ax.set_aspect("equal") if vp == hp == 1: ax.set_xlim(-10, 10) ax.set_ylim(-10, 10) elif vp == 1: ax.set_xlim(min(xs) - hs, max(xs) + hs) ax.set_ylim(-10, 10) elif hp == 1: ax.set_xlim(-10, 10) ax.set_ylim(min(ys) - vs, max(ys) + vs) else: ax.set_xlim(min(xs) - hs, max(xs) + hs) ax.set_ylim(min(ys) - vs, max(ys) + vs) else: # Should never reach here.. but should we set limit here? vs, hs = 5, 5 canvas = matplotlib.backends.backend_agg.FigureCanvasAgg(fig) canvas.print_figure(pngout + ".tmp", dpi=80, format="png") img_width = fig.get_figwidth() * 80 img_height = fig.get_figheight() * 80 map_str = '<map name="%smap">\n' % scan_prefix for x, y, imgf in zip(xs, ys, imgfs): if plot_grid: tx1, ty1 = ax.transData.transform((x - hs / 2., y - vs / 2.)) tx2, ty2 = ax.transData.transform((x + hs / 2., y + vs / 2.)) map_str += ' <area shape="rect" coords="%.2f,%.2f,%.2f,%.2f" title="%s" onClick=\'plotClick("%s", "%s")\'>\n' % ( tx1, img_height - ty1, tx2, img_height - ty2, os.path.basename(imgf), scan_prefix, os.path.basename(imgf)) else: tx, ty = ax.transData.transform((x, y)) map_str += ' <area shape="circle" coords="%.2f,%.2f,10" title="%s" onClick=\'plotClick("%s", "%s")\'>\n' % ( tx, img_height - ty, os.path.basename(imgf), scan_prefix, os.path.basename(imgf)) map_str += "</map>" return pngout, map_str
def make_html_report(current_stats, wdir, htmlout, zoo_mode, rotate=False, plot_grid=True): #plot_data = self.plotFrame.data shikalog.info("Making HTML report for %s" % wdir) startt = time.time() plot_data = collections.OrderedDict() for f, stat in current_stats.items(): if stat is None: continue fpref = decide_fpref(f, stat.scan_info) plot_data.setdefault(fpref, []).append((f, stat)) #if gui_params.mode == "zoo": htmlout = os.path.join(wdir, "report_zoo.html") #else: htmlout = os.path.join(wdir, "report.html") if rotate: rotate_file(htmlout) if zoo_mode: assert len(plot_data) <= 1 kinds = ("total_integrated_signal", "median_integrated_signal", "n_spots") plots = "" pngs = [] for f in plot_data: scan_prefix = f[:f.index(" ")] if " (phi=" in f else f info = plot_data[f][0][1].scan_info if info is None: info = bl_logfiles.ScanInfo() # Empty info plots += '<table border=0 style="margin-bottom:0px">\n <tr><td>\n' if zoo_mode: try: im = Image.open(os.path.join(wdir, "../../../before.ppm")) im.save(os.path.join(wdir, "loop_before.jpg")) except: import traceback print "Can't convert loop image" print traceback.format_exc() plots += ' Loop image</td><td><img src="loop_before.jpg" /></td></tr>\n' plots += ' <tr><td>\n' plots += ' <table class="info"><tr><th>scan</th><td>%s</td></tr>\n' % scan_prefix plots += ' <tr><th>date</th><td>%s</td></tr>\n' % ( info.date.strftime("%Y/%m/%d %H:%M:%S") if info.date != 0 else "??") if info.is_shutterless(): plots += ' <tr><th>fixed spindle</th><td>%.2f°</td></tr>\n' % info.fixed_spindle plots += ' <tr><th>frame rate</th><td>%.2f [Hz]</td></tr>\n' % info.frame_rate else: plots += ' <tr><th>osc. start</th><td>%.2f°</td></tr>\n' % info.osc_start plots += ' <tr><th>osc. step</th><td>%.2f°</td></tr>\n' % info.osc_step plots += ' <tr><th>exp. time</th><td>%.2f [sec]</td></tr>\n' % info.exp_time plots += ' <tr><th>beam size</th><td>h= %.1f, v= %.1f [μm]</td></tr>\n' % ( info.beam_hsize, info.beam_vsize) plots += ' <tr><th>attenuator</th><td>%s %.1f [μm]</td></tr>\n' % info.attenuator plots += ' <tr><th>distance</th><td>%.2f [mm]</td></tr>\n' % info.distance plots += ' <tr><th>wavelength</th><td>%.4f [Å]</td></tr>\n' % info.wavelength plots += ' <tr><th>scan points</th><td>v=%d, h=%d</td></tr>\n' % ( info.vpoints, info.hpoints) plots += ' <tr><th>scan steps</th><td>v=%.2f, h=%.2f [μm]</td></tr>\n' % ( info.vstep * 1000., info.hstep * 1000.) plots += ' </table>\n' for i, kind in enumerate(kinds): pngout, mapstr = prepare_plot(plot_data, f, kind, wdir, rotate, plot_grid) pngs.append(pngout) # rename later adds = "" if i == 0: plots += ' <td><img name="%s" src="%s" usemap="#%smap" /><br />\n' % ( scan_prefix, os.path.basename(pngout), scan_prefix) plots += '<form>\n' adds = ' checked="checked"' plots += '<input type="radio" name="spot_mode" value="%s" onClick="changeplot(this, \'%s\')"%s />%s<br />\n' % ( kind, scan_prefix, adds, kind) plots += '</form>%s</td></tr></table><br>\n\n' % mapstr # The last mapstr is used. This is dirty way, though. plots += '<table border=0 style="margin-bottom:20px">\n <tr><td>\n' plots += '<td style="border:solid 1px #999"><canvas id="%scanvas" width=600 height=600></canvas>\n' % scan_prefix plots += '<td id="%sinfo" valign="top"></tr></table>\n\n' % scan_prefix result = current_stats.items() if len(result) == 0: shikalog.warning("No results found. Exiting. %s" % wdir) return dbfile = os.path.join(wdir, "shika.db") con = sqlite3.connect(dbfile, timeout=10, isolation_level=None) con.execute('pragma query_only = ON;') print "Reading data from DB for making report html." c = con.execute("select filename,spots from spots") dbspots = dict( map(lambda x: (str(x[0]), pickle.loads(str(x[1]))), c.fetchall())) spot_data = "var spot_data = {" for i, (f, stat) in enumerate(result): if stat is None: continue bf = os.path.basename(f) spots = dbspots[bf]["spots"] thumb_posmag = dbspots[bf]["thumb_posmag"] r = re.search("^(.*)_([0-9]+)\.[^0-9]+$", bf) prefix, num = r.group(1), int(r.group(2)) spot_data += '"%s":[[' % bf for y, x, snr, d in spots: #x, y = spot.max_pxl_y(), spot.max_pxl_x() pos = thumb_posmag[0:2] mag = thumb_posmag[2] x, y = (x - pos[0]) * mag, (y - pos[1]) * mag spot_data += "[%d,%d]," % (x, y) spot_data += "], %.1f, %.1f, %d, %d]," % (stat.stats[1], stat.stats[2], stat.stats[0], num) spot_data += "};" spot_data = spot_data.replace("inf,", "Infinity,").replace("nan,", "NaN,") con.close() # Determine img picture extension img_ext = ".png" if os.path.exists( os.path.join(wdir, os.path.basename(result[0][0]) + ".png")) else ".jpg" jpg_dirs = "var jpg_dirs = {" flag_tiled_jpg = False if glob.glob(os.path.join(wdir, "thumb_*")): for res in result: r = re.search("^(.*)_([0-9]+)\.[^0-9]+$", os.path.basename(res[0])) prefix, num = r.group(1), int(r.group(2)) jd = os.path.join("thumb_%s_%.3d" % (prefix, num // 1000)) if not os.path.exists(jd): flag_tiled_jpg = True # THIS MAY CAUSE A PROBLEM.. jpg_dirs += '"%s":"%s",' % (os.path.basename(res[0]), jd) else: for res in result: jpg_dirs += '"%s":".",' % os.path.basename(res[0]) jpg_dirs += "};" ofs = open(htmlout, "w") ofs.write("""\ <!DOCTYPE html> <html> <head> <meta charset="UTF-8" /> <title>SHIKA report</title> <script type="text/javascript"> <!-- function changeplot(obj, name){ document.images[name].src = "plot_"+name+obj.value+".png"; } %(spot_data)s %(jpg_dirs)s """ % dict(spot_data=spot_data, jpg_dirs=jpg_dirs if not flag_tiled_jpg else "")) if flag_tiled_jpg: # FOR TILED JPEG ofs.write("""\ function plotClick(scanprefix, imgfile) { var f = imgfile; var data = spot_data[f]; var img = new Image(); var idx = Math.floor((data[4]-1)/100); var n1 = idx*100+1; var n2 = (idx+1)*100; img.src = "thumb_" + scanprefix.slice(0,-1) + "/" + scanprefix + ("00000"+n1).slice(-6) + "-" + ("00000"+n2).slice(-6) + ".jpg"; // prefix ends with _ var idx2 = (data[4]-1)%%100; var sx = idx2%%10; var sy = Math.floor(idx2/10); img.onload = (function(fn){ return function(){ var td = document.getElementById(scanprefix+"info"); td.innerHTML = "<table border=0><tr><td>File name: <td>" + imgfile + "<tr><td>total signal: <td>" + data[1] + "<tr><td>median signal: <td>" + data[2] + "<tr><td>N_spots: <td>" + data[3] + "</table>"; var t = data[0]; var canvas = document.getElementById(scanprefix+"canvas"); var ctx = canvas.getContext('2d'); ctx.clearRect(0,0,canvas.width,canvas.height); ctx.drawImage(this, sx*600, sy*600, 600, 600, 0, 0, 600, 600); """ % dict(img_ext=img_ext)) else: # FOR SINGLE JPEGs ofs.write("""\ function plotClick(scanprefix, imgfile) { var f = imgfile; var data = spot_data[f]; var img = new Image(); img.src = jpg_dirs[f] + "/" + f + "%(img_ext)s"; img.onload = (function(fn){ return function(){ var td = document.getElementById(scanprefix+"info"); td.innerHTML = "<table border=0><tr><td>File name: <td>" + imgfile + "<tr><td>total signal: <td>" + data[1] + "<tr><td>median signal: <td>" + data[2] + "<tr><td>N_spots: <td>" + data[3] + "</table>"; var t = data[0]; var canvas = document.getElementById(scanprefix+"canvas"); var ctx = canvas.getContext('2d'); ctx.clearRect(0,0,canvas.width,canvas.height); ctx.drawImage(this, 0, 0); """ % dict(img_ext=img_ext)) # Common parts ofs.write("""\ for (var i = 0; i < t.length; i++) { ctx.rect(t[i][0]-6, t[i][1]-6, 12, 12); } ctx.strokeStyle = "red"; ctx.lineWidth = 1; ctx.stroke(); var center = [300,300]; ctx.beginPath(); ctx.strokeStyle = "blue"; ctx.moveTo(center[0]-10, center[1]); ctx.lineTo(center[0]+10, center[1]); ctx.moveTo(center[0], center[1]-10); ctx.lineTo(center[0], center[1]+10); ctx.stroke(); } }(f)); } //--> </script> <style type="text/css"> <!-- table.info { border-collapse: separate; border-spacing: 7px; } table.info th { text-align: left; } table.images { border-collapse: collapse; border: solid 1px #999; } table.images caption { margin-top: 1em; text-align: left; } table.images th, table.images td { border: solid 1px #999; } table.images th { background: #E6E6E6; text-align: center; white-space: nowrap; } --> </style> </head> <body> <h1>SHIKA report</h1> <div align="right"> Created on %(date)s<br> Original directory: %(wdir)s </div> <hr style="height: 1px;border: none;border-top: 1px #000000 dotted;" /> %(plots)s </body> </html> """ % dict( plots=plots, date=datetime.datetime.today().strftime("%Y/%m/%d %H:%M:%S"), wdir=wdir, )) shikalog.debug("Renaming png files in %s" % wdir) for png in pngs: os.rename(png + ".tmp", png) delt = time.time() - startt shikalog.info("HTML making Done (took %f s). Open? firefox %s" % (delt, htmlout))