def run(params): if os.path.isdir(params.workdir) and os.listdir(params.workdir): print "Directory already exists and not empty:", params.workdir return # Check parameters if params.program == "xscale": if (params.xscale.frames_per_batch, params.xscale.degrees_per_batch).count(None) == 0: print "ERROR! You can't specify both of xscale.frames_per_batch and xscale.degrees_per_batch" return if params.reference_file is not None and params.program != "xscale": print "WARNING - reference file is not used unless program=xscale." if not os.path.isdir(params.workdir): os.makedirs(params.workdir) if params.batch.engine == "sge": batchjobs = batchjob.SGE(pe_name=params.batch.sge_pe_name) elif params.batch.engine == "sh": batchjobs = batchjob.ExecLocal(max_parallel=params.batch.sh_max_jobs) else: raise "Unknown batch engine: %s" % params.batch.engine out = multi_out() out.register("log", open(os.path.join(params.workdir, "multi_merge.log"), "w"), atexit_send_to=None) out.register("stdout", sys.stdout) out.write("kamo.multi_merge started at %s\n\n" % time.strftime("%Y-%m-%d %H:%M:%S")) time_started = time.time() print >> out, "Paramters:" libtbx.phil.parse(master_params_str).format(params).show(out=out, prefix=" ") print >> out, "" # XXX Not works when clustering is used.. html_report = multi_merging.html_report.HtmlReportMulti( os.path.abspath(params.workdir)) try: html_report.add_params(params, master_params_str) except: print >> out, traceback.format_exc() xds_ascii_files = util.read_path_list(params.lstin, only_exists=True, as_abspath=True, err_out=out) if not xds_ascii_files: print >> out, "ERROR! Cannot find (existing) files in %s." % params.lstin return if len(xds_ascii_files) < 2: print >> out, "ERROR! Only one file in %s." % params.lstin print >> out, " Give at least two files for merging." return cells = collections.OrderedDict() laues = {} # for check for xac in xds_ascii_files: try: symm = XDS_ASCII(xac, read_data=False).symm except: print >> out, "Error in reading %s" % xac print >> out, traceback.format_exc() return cells[xac] = symm.unit_cell().parameters() laue = symm.space_group().build_derived_reflection_intensity_group( False).info() laues.setdefault(str(laue), {}).setdefault( symm.space_group_info().type().number(), []).append(xac) if len(laues) > 1: print >> out, "ERROR! more than one space group included." for laue in laues: print "Laue symmetry", laue for sg in laues[laue]: print >> out, " SPACE_GROUP_NUMBER= %d (%d data)" % ( sg, len(laues[laue][sg])) for f in laues[laue][sg]: print >> out, " %s" % f print >> out, "" return space_group = None if params.space_group is not None: space_group = sgtbx.space_group_info(params.space_group).group() laue_given = str( space_group.build_derived_reflection_intensity_group(False).info()) if laue_given != laues.keys()[0]: print >> out, "ERROR! user-specified space group (space_group=%s) is not compatible with input files (%s)" % ( params.space_group, laues.keys()[0]) return sg_refset = space_group.info().as_reference_setting().group() if space_group != sg_refset: print >> out, "Sorry! currently space group in non-reference setting is not supported." print >> out, "(You requested %s, which is different from reference setting: %s)" % ( space_group.info(), sg_refset.info()) return else: tmp = sgtbx.space_group_info( laues.values()[0].keys() [0]).group().build_derived_reflection_intensity_group(True) print >> out, "Space group for merging:", tmp.info() test_flag_will_be_transferred = False if params.reference.data is not None: params.reference.data = os.path.abspath(params.reference.data) print >> out, "Reading reference data file: %s" % params.reference.data tmp = iotbx.file_reader.any_file(params.reference.data, force_type="hkl", raise_sorry_if_errors=True) if params.reference.copy_test_flag: from yamtbx.dataproc.command_line import copy_free_R_flag if None in copy_free_R_flag.get_flag_array( tmp.file_server.miller_arrays, log_out=out): print >> out, " Warning: no test flag found in reference file (%s)" % params.reference.data else: test_flag_will_be_transferred = True print >> out, " test flag will be transferred" if space_group is not None: if space_group != tmp.file_server.miller_arrays[0].space_group(): print >> out, " ERROR! space_group=(%s) and that of reference.data (%s) do not match." % ( space_group.info(), tmp.file_server.miller_arrays[0].space_group_info()) return else: space_group = tmp.file_server.miller_arrays[0].space_group() print >> out, " space group for merging: %s" % space_group.info() if params.add_test_flag: if test_flag_will_be_transferred: print >> out, "Warning: add_test_flag=True was set, but the flag will be transferred from the reference file given." else: from cctbx import r_free_utils med_cell = numpy.median(cells.values(), axis=0) d_min = max( params.d_min - 0.2, 1.0 ) if params.d_min is not None else 1.5 # to prevent infinite set sg = space_group if not sg: sg = sgtbx.space_group_info( laues.values()[0].keys() [0]).group().build_derived_reflection_intensity_group(True) tmp = miller.build_set(crystal.symmetry(tuple(med_cell), space_group=sg), False, d_min=d_min, d_max=None) print >> out, "Generating test set using the reference symmetry:" crystal.symmetry.show_summary(tmp, out, " ") tmp = tmp.generate_r_free_flags(fraction=0.05, max_free=None, lattice_symmetry_max_delta=5.0, use_lattice_symmetry=True, n_shells=20) tmp.show_r_free_flags_info(out=out, prefix=" ") tmp = tmp.customized_copy( data=r_free_utils.export_r_free_flags_for_ccp4( flags=tmp.data(), test_flag_value=True)) mtz_object = tmp.as_mtz_dataset( column_root_label="FreeR_flag").mtz_object() test_flag_mtz = os.path.abspath( os.path.join(params.workdir, "test_flag.mtz")) mtz_object.write(file_name=test_flag_mtz) # Override the parameters params.reference.copy_test_flag = True params.reference.data = test_flag_mtz try: html_report.add_cells_and_files(cells, laues.keys()[0]) except: print >> out, traceback.format_exc() data_for_merge = [] if params.clustering == "blend": if params.blend.use_old_result is None: blend_wdir = os.path.join(params.workdir, "blend") os.mkdir(blend_wdir) blend.run_blend0R(blend_wdir, xds_ascii_files) print >> out, "\nRunning BLEND with analysis mode" else: blend_wdir = params.blend.use_old_result print >> out, "\nUsing precalculated BLEND result in %s" % params.blend.use_old_result blend_clusters = blend.BlendClusters(workdir=blend_wdir, d_min=params.d_min) summary_out = os.path.join(blend_wdir, "blend_cluster_summary.dat") clusters = blend_clusters.show_cluster_summary( out=open(summary_out, "w")) print >> out, "Clusters found by BLEND were summarized in %s" % summary_out if params.blend.min_cmpl is not None: clusters = filter(lambda x: x[3] >= params.blend.min_cmpl, clusters) if params.blend.min_acmpl is not None: clusters = filter(lambda x: x[5] >= params.blend.min_acmpl, clusters) if params.blend.min_redun is not None: clusters = filter(lambda x: x[4] >= params.blend.min_redun, clusters) if params.blend.min_aredun is not None: clusters = filter(lambda x: x[6] >= params.blend.min_aredun, clusters) if params.blend.max_LCV is not None: clusters = filter(lambda x: x[7] <= params.blend.max_LCV, clusters) if params.blend.max_aLCV is not None: clusters = filter(lambda x: x[8] <= params.blend.max_aLCV, clusters) if params.max_clusters is not None and len( clusters) > params.max_clusters: print >> out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % ( params.max_clusters, len(clusters)) clusters = clusters[:params.max_clusters] if clusters: print >> out, "With specified conditions, following %d clusters will be merged:" % len( clusters) else: print >> out, "\nERROR: No clusters satisfied the specified conditions for merging!" print >> out, "Please change criteria of completeness or redundancy" print >> out, "Here is the table of completeness and redundancy for each cluster:\n" print >> out, open(summary_out).read() for clno, IDs, clh, cmpl, redun, acmpl, aredun, LCV, aLCV in clusters: # process largest first print >> out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f LCV= %5.1f aLCV=%5.1f" % ( clno, len(IDs), clh, cmpl, redun, acmpl, aredun, LCV, aLCV) data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d" % clno), map(lambda x: blend_clusters.files[x - 1], IDs), LCV, aLCV, clh)) print >> out try: html_report.add_clutering_result(clusters, "blend") except: print >> out, traceback.format_exc() elif params.clustering == "cc": ccc_wdir = os.path.join(params.workdir, "cc_clustering") os.mkdir(ccc_wdir) cc_clusters = cc_clustering.CCClustering( ccc_wdir, xds_ascii_files, d_min=params.cc_clustering.d_min if params.cc_clustering.d_min is not None else params.d_min, min_ios=params.cc_clustering.min_ios) print >> out, "\nRunning CC-based clustering" cc_clusters.do_clustering( nproc=params.cc_clustering.nproc, b_scale=params.cc_clustering.b_scale, use_normalized=params.cc_clustering.use_normalized, cluster_method=params.cc_clustering.method, distance_eqn=params.cc_clustering.cc_to_distance, min_common_refs=params.cc_clustering.min_common_refs, html_maker=html_report) summary_out = os.path.join(ccc_wdir, "cc_cluster_summary.dat") clusters = cc_clusters.show_cluster_summary(d_min=params.d_min, out=open(summary_out, "w")) print >> out, "Clusters were summarized in %s" % summary_out if params.cc_clustering.min_cmpl is not None: clusters = filter(lambda x: x[3] >= params.cc_clustering.min_cmpl, clusters) if params.cc_clustering.min_acmpl is not None: clusters = filter(lambda x: x[5] >= params.cc_clustering.min_acmpl, clusters) if params.cc_clustering.min_redun is not None: clusters = filter(lambda x: x[4] >= params.cc_clustering.min_redun, clusters) if params.cc_clustering.min_aredun is not None: clusters = filter( lambda x: x[6] >= params.cc_clustering.min_aredun, clusters) if params.cc_clustering.max_clheight is not None: clusters = filter( lambda x: x[2] <= params.cc_clustering.max_clheight, clusters) if params.max_clusters is not None and len( clusters) > params.max_clusters: print >> out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % ( params.max_clusters, len(clusters)) clusters = clusters[:params.max_clusters] if clusters: print >> out, "With specified conditions, following %d clusters will be merged:" % len( clusters) else: print >> out, "\nERROR: No clusters satisfied the specified conditions for merging!" print >> out, "Please change criteria of completeness or redundancy" print >> out, "Here is the table of completeness and redundancy for each cluster:\n" print >> out, open(summary_out).read() for clno, IDs, clh, cmpl, redun, acmpl, aredun, ccmean, ccmin in clusters: # process largest first print >> out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f CCmean=% .4f CCmin=% .4f" % ( clno, len(IDs), clh, cmpl, redun, acmpl, aredun, ccmean, ccmin) data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d" % clno), map(lambda x: xds_ascii_files[x - 1], IDs), float("nan"), float("nan"), clh)) print >> out try: html_report.add_clutering_result(clusters, "cc_clustering") except: print >> out, traceback.format_exc() else: data_for_merge.append((os.path.join(params.workdir, "all_data"), xds_ascii_files, float("nan"), float("nan"), 0)) ofs_summary = open(os.path.join(params.workdir, "cluster_summary.dat"), "w") ofs_summary.write( "# d_min= %.3f A\n" % (params.d_min if params.d_min is not None else float("nan"))) ofs_summary.write("# LCV and aLCV are values of all data\n") ofs_summary.write( " cluster ClH LCV aLCV run ds.all ds.used Cmpl Redun I/sigI Rmeas CC1/2 Cmpl.ou Red.ou I/sig.ou Rmeas.ou CC1/2.ou Cmpl.in Red.in I/sig.in Rmeas.in CC1/2.in SigAno.in CCano.in WilsonB Aniso.bst Aniso.wst dmin.est\n" ) out.flush() def write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats): tmps = "%12s %6.2f %4.1f %4.1f %3d %6d %7d %5.1f %5.1f %6.2f %5.1f %5.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %9.1f %8.1f %7.2f %9.2f %9.2f %.2f\n" ofs_summary.write(tmps % ( os.path.relpath(workdir, params.workdir), clh, LCV, aLCV, cycle, len(xds_files), num_files, stats["cmpl"][0], stats["redundancy"][0], stats["i_over_sigma"][0], stats["r_meas"][0], stats["cc_half"][0], stats["cmpl"][2], stats["redundancy"][2], stats["i_over_sigma"][2], stats["r_meas"][2], stats["cc_half"][2], stats["cmpl"][1], stats["redundancy"][1], stats["i_over_sigma"][1], stats["r_meas"][1], stats["cc_half"][1], stats["sig_ano"][1], stats["cc_ano"][1], stats["xtriage_log"].wilson_b, #stats["xtriage_log"].anisotropy, stats["aniso"]["d_min_best"], stats["aniso"]["d_min_worst"], stats["dmin_est"], )) ofs_summary.flush() # write_ofs_summary() if "merging" in params.batch.par_run: params.nproc = params.batch.nproc_each jobs = [] for workdir, xds_files, LCV, aLCV, clh in data_for_merge: if not os.path.exists(workdir): os.makedirs(workdir) shname = "merge_%s.sh" % os.path.relpath(workdir, params.workdir) pickle.dump((params, os.path.abspath(workdir), xds_files, cells, space_group), open(os.path.join(workdir, "args.pkl"), "w"), -1) job = batchjob.Job(workdir, shname, nproc=params.batch.nproc_each) job.write_script("""\ cd "%s" || exit 1 "%s" -c '\ import pickle; \ from yamtbx.dataproc.auto.command_line.multi_merge import merge_datasets; \ args = pickle.load(open("args.pkl")); \ ret = merge_datasets(*args); \ pickle.dump(ret, open("result.pkl","w")); \ ' """ % (os.path.abspath(workdir), sys.executable)) batchjobs.submit(job) jobs.append(job) batchjobs.wait_all(jobs) for workdir, xds_files, LCV, aLCV, clh in data_for_merge: try: results = pickle.load(open(os.path.join(workdir, "result.pkl"))) except: print >> out, "Error in unpickling result in %s" % workdir print >> out, traceback.format_exc() results = [] if len(results) == 0: ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir)) lcv, alcv = float("nan"), float("nan") for cycle, wd, num_files, stats in results: lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV) write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files, num_files, stats) # Last lcv & alcv try: html_report.add_merge_result(workdir, clh, lcv, alcv, xds_files, results[-1][2], results[-1][3]) except: print >> out, traceback.format_exc() else: for workdir, xds_files, LCV, aLCV, clh in data_for_merge: print >> out, "Merging %s..." % os.path.relpath( workdir, params.workdir) out.flush() results = merge_datasets(params, workdir, xds_files, cells, space_group) if len(results) == 0: ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir)) for cycle, wd, num_files, stats in results: lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV) write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files, num_files, stats) try: html_report.add_merge_result(workdir, clh, lcv, alcv, xds_files, results[-1][2], results[-1][3]) except: print >> out, traceback.format_exc() try: html_report.write_html() except: print >> out, traceback.format_exc() print "firefox %s" % os.path.join(html_report.root, "report.html") out.write("\nNormal exit at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S")) out.write("Total wall-clock time: %.2f sec.\n" % (time.time() - time_started)) return
def xds_sequence(root, params): print print os.path.relpath(root, params.topdir) init_lp = os.path.join(root, "INIT.LP") xparm = os.path.join(root, "XPARM.XDS") gxparm = os.path.join(root, "GXPARM.XDS") defpix_lp = os.path.join(root, "DEFPIX.LP") correct_lp = os.path.join(root, "CORRECT.LP") integrate_hkl = os.path.join(root, "INTEGRATE.HKL") xac_hkl = os.path.join(root, "XDS_ASCII.HKL") integrate_lp = os.path.join(root, "INTEGRATE.LP") spot_xds = os.path.join(root, "SPOT.XDS") xdsinp = os.path.join(root, "XDS.INP") assert os.path.isfile(xdsinp) if params.cell_prior.force: assert params.cell_prior.check xdsinp_dict = dict(get_xdsinp_keyword(xdsinp)) if params.cell_prior.sgnum > 0: xs_prior = crystal.symmetry(params.cell_prior.cell, params.cell_prior.sgnum) else: xs_prior = None decilog = multi_out() decilog.register("log", open(os.path.join(root, "decision.log"), "a"), atexit_send_to=None) try: print >> decilog, "xds_sequence started at %s in %s\n" % ( time.strftime("%Y-%m-%d %H:%M:%S"), root) if not kamo_test_installation.tst_xds(): print >> decilog, "XDS is not installed or expired!!" return if params.show_progress: decilog.register("stdout", sys.stdout) if params.mode == "initial" and params.resume and os.path.isfile( correct_lp): print >> decilog, " Already processed." return if params.mode == "recycle" and not os.path.isfile(gxparm): print >> decilog, "GXPARM.XDS not found. Cannot do recycle." return if params.fast_delphi and (params.nproc is None or params.nproc > 1): delphi = optimal_delphi_by_nproc(xdsinp=xdsinp, nproc=params.nproc) print >> decilog, " Setting delphi to ", delphi modify_xdsinp(xdsinp, inp_params=[ ("DELPHI", str(delphi)), ]) if params.nproc is not None and params.nproc > 1: modify_xdsinp(xdsinp, inp_params=[ ("MAXIMUM_NUMBER_OF_PROCESSORS", str(params.nproc)), ]) if params.mode == "initial": modify_xdsinp(xdsinp, inp_params=[("JOB", "XYCORR INIT")]) run_xds(wdir=root, show_progress=params.show_progress) initlp = InitLp(init_lp) first_bad = initlp.check_bad_first_frames() if first_bad: print >> decilog, " first frames look bad (too weak) exposure:", first_bad new_data_range = map( int, dict(get_xdsinp_keyword(xdsinp))["DATA_RANGE"].split()) new_data_range[0] = first_bad[-1] + 1 print >> decilog, " changing DATA_RANGE= to", new_data_range modify_xdsinp(xdsinp, inp_params=[("JOB", "INIT"), ("DATA_RANGE", "%d %d" % tuple(new_data_range))]) for f in xds_files.generated_by_INIT: util.rotate_file(os.path.join(root, f), copy=False) run_xds(wdir=root, show_progress=params.show_progress) # Peak search modify_xdsinp(xdsinp, inp_params=[("JOB", "COLSPOT")]) run_xds(wdir=root, show_progress=params.show_progress) if params.auto_frame_exclude_spot_based: sx = idxreflp.SpotXds(spot_xds) sx.set_xdsinp(xdsinp) spots = filter(lambda x: 5 < x[-1] < 30, sx.collected_spots()) # low-res (5 A) frame_numbers = numpy.array(map(lambda x: int(x[2]) + 1, spots)) data_range = map( int, dict(get_xdsinp_keyword(xdsinp))["DATA_RANGE"].split()) # XXX this assumes SPOT_RANGE equals to DATA_RANGE. Is this guaranteed? h = numpy.histogram(frame_numbers, bins=numpy.arange(data_range[0], data_range[1] + 2, step=1)) q14 = numpy.percentile(h[0], [25, 75]) iqr = q14[1] - q14[0] cutoff = max( h[0][h[0] <= iqr * 1.5 + q14[1]]) / 5 # magic number print >> decilog, "DEBUG:: IQR= %.2f, Q1/4= %s, cutoff= %.2f" % ( iqr, q14, cutoff) cut_frames = h[1][h[0] < cutoff] keep_frames = h[1][h[0] >= cutoff] print >> decilog, "DEBUG:: keep_frames=", keep_frames print >> decilog, "DEBUG:: cut_frames=", cut_frames if len(cut_frames) > 0: cut_ranges = [ [cut_frames[0], cut_frames[0]], ] for fn in cut_frames: if fn - cut_ranges[-1][1] <= 1: cut_ranges[-1][1] = fn else: cut_ranges.append([fn, fn]) # Edit XDS.INP cut_inp_str = "".join( map( lambda x: "EXCLUDE_DATA_RANGE= %6d %6d\n" % tuple( x), cut_ranges)) open(xdsinp, "a").write("\n" + cut_inp_str) # Edit SPOT.XDS shutil.copyfile(spot_xds, spot_xds + ".org") sx.write(open(spot_xds, "w"), frame_selection=set(keep_frames)) # Indexing if params.cell_prior.method == "use_first": modify_xdsinp(xdsinp, inp_params=[ ("JOB", "IDXREF"), ("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, params.cell_prior.cell))), ("SPACE_GROUP_NUMBER", "%d" % params.cell_prior.sgnum), ]) else: modify_xdsinp(xdsinp, inp_params=[("JOB", "IDXREF")]) run_xds(wdir=root, show_progress=params.show_progress) print >> decilog, "" # TODO indexing stats like indexed percentage here. if params.tryhard: try_indexing_hard(root, params.show_progress, decilog, known_sgnum=params.cell_prior.sgnum, known_cell=params.cell_prior.cell, tol_length=params.cell_prior.tol_length, tol_angle=params.cell_prior.tol_angle) if not os.path.isfile(xparm): print >> decilog, " Indexing failed." return if params.cell_prior.sgnum > 0: # Check anyway xsxds = XPARM(xparm).crystal_symmetry() cosets = reindex.reindexing_operators( xs_prior, xsxds, params.cell_prior.tol_length, params.cell_prior.tol_angle) if cosets.double_cosets is None: if params.cell_prior.check: print >> decilog, " Incompatible cell. Indexing failed." return else: print >> decilog, " Warning: Incompatible cell." elif params.cell_prior.method == "symm_constraint_only": cell = xsxds.unit_cell().change_basis( cosets.combined_cb_ops()[0]) print >> decilog, " Trying symmetry-constrained cell parameter:", cell modify_xdsinp(xdsinp, inp_params=[ ("JOB", "IDXREF"), ("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, cell.parameters()))), ("SPACE_GROUP_NUMBER", "%d" % params.cell_prior.sgnum), ]) for f in xds_files.generated_by_IDXREF: util.rotate_file(os.path.join(root, f), copy=(f == "SPOT.XDS")) run_xds(wdir=root, show_progress=params.show_progress) if not os.path.isfile(xparm): print >> decilog, " Indexing failed." return # Check again xsxds = XPARM(xparm).crystal_symmetry() if not xsxds.unit_cell().is_similar_to( xs_prior.unit_cell(), params.cell_prior.tol_length, params.cell_prior.tol_angle): print >> decilog, " Resulted in different cell. Indexing failed." return elif params.mode == "recycle": print >> decilog, " Start recycle. original ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) for f in xds_files.generated_after_DEFPIX + ("XPARM.XDS", "plot_integrate.log"): util.rotate_file(os.path.join(root, f), copy=True) shutil.copyfile(gxparm + ".1", xparm) else: raise "Unknown mode (%s)" % params.mode # To Integration modify_xdsinp(xdsinp, inp_params=[("JOB", "DEFPIX INTEGRATE"), ("INCLUDE_RESOLUTION_RANGE", "50 0")]) run_xds(wdir=root, show_progress=params.show_progress) if os.path.isfile(integrate_lp): xds_plot_integrate.run(integrate_lp, os.path.join(root, "plot_integrate.log")) if not os.path.isfile(integrate_hkl): print >> decilog, " Integration failed." return # Make _noscale.HKL if needed if params.no_scaling: bk_prefix = make_backup(("XDS.INP", ), wdir=root, quiet=True) xparm_obj = XPARM(xparm) modify_xdsinp(xdsinp, inp_params=[ ("JOB", "CORRECT"), ("CORRECTIONS", ""), ("NBATCH", "1"), ("MINIMUM_I/SIGMA", "50"), ("REFINE(CORRECT)", ""), ("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, xparm_obj.unit_cell))), ("SPACE_GROUP_NUMBER", "%d" % xparm_obj.spacegroup), ]) print >> decilog, " running CORRECT without empirical scaling" run_xds(wdir=root, show_progress=params.show_progress) for f in xds_files.generated_by_CORRECT + ("XDS.INP", ): ff = os.path.join(root, f) if not os.path.isfile(ff): continue if ff.endswith(".cbf"): os.remove(ff) else: os.rename(ff, ff + "_noscale") revert_files(("XDS.INP", ), bk_prefix, wdir=root, quiet=True) # Run pointless pointless_integrate = {} if params.use_pointless: worker = Pointless() pointless_integrate = worker.run_for_symm( xdsin=integrate_hkl, logout=os.path.join(root, "pointless_integrate.log")) if "symm" in pointless_integrate: symm = pointless_integrate["symm"] print >> decilog, " pointless using INTEGRATE.HKL suggested", symm.space_group_info( ) if xs_prior: if xtal.is_same_space_group_ignoring_enantiomorph( symm.space_group(), xs_prior.space_group()): print >> decilog, " which is consistent with given symmetry." elif xtal.is_same_laue_symmetry(symm.space_group(), xs_prior.space_group()): print >> decilog, " which has consistent Laue symmetry with given symmetry." else: print >> decilog, " which is inconsistent with given symmetry." sgnum = symm.space_group_info().type().number() cell = " ".join( map(lambda x: "%.2f" % x, symm.unit_cell().parameters())) modify_xdsinp(xdsinp, inp_params=[("SPACE_GROUP_NUMBER", "%d" % sgnum), ("UNIT_CELL_CONSTANTS", cell)]) else: print >> decilog, " pointless failed." flag_do_not_change_symm = False if xs_prior and params.cell_prior.force: modify_xdsinp(xdsinp, inp_params=[("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, params.cell_prior.cell))), ("SPACE_GROUP_NUMBER", "%d" % params.cell_prior.sgnum)]) flag_do_not_change_symm = True elif params.cell_prior.method == "correct_only": xsxds = XPARM(xparm).crystal_symmetry() cosets = reindex.reindexing_operators(xs_prior, xsxds, params.cell_prior.tol_length, params.cell_prior.tol_angle) if cosets.double_cosets is not None: cell = xsxds.unit_cell().change_basis( cosets.combined_cb_ops()[0]) print >> decilog, " Using given symmetry in CORRECT with symmetry constraints:", cell modify_xdsinp(xdsinp, inp_params=[ ("UNIT_CELL_CONSTANTS", " ".join( map(lambda x: "%.3f" % x, cell.parameters()))), ("SPACE_GROUP_NUMBER", "%d" % params.cell_prior.sgnum), ]) flag_do_not_change_symm = True else: print >> decilog, " Tried to use given symmetry in CORRECT, but cell in integration is incompatible." # Do Scaling modify_xdsinp(xdsinp, inp_params=[ ("JOB", "CORRECT"), ]) run_xds(wdir=root, show_progress=params.show_progress) if not os.path.isfile(xac_hkl): print >> decilog, " CORRECT failed." return if not os.path.isfile(gxparm): print >> decilog, " Refinement in CORRECT failed." print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) ret = calc_merging_stats(xac_hkl) if params.cut_resolution: if ret is not None and ret[0] is not None: d_min = ret[0] modify_xdsinp(xdsinp, inp_params=[("JOB", "CORRECT"), ("INCLUDE_RESOLUTION_RANGE", "50 %.2f" % d_min)]) print >> decilog, " Re-scale at %.2f A" % d_min os.rename(os.path.join(root, "CORRECT.LP"), os.path.join(root, "CORRECT_fullres.LP")) os.rename(xac_hkl, os.path.join(root, "XDS_ASCII_fullres.HKL")) run_xds(wdir=root, show_progress=params.show_progress) print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) print >> decilog, " (Original files are saved as *_fullres.*)" else: print >> decilog, "error: Can't decide resolution." last_ISa = correctlp.get_ISa(correct_lp, check_valid=True) # Run pointless and (if result is different from INTEGRATE) re-scale. if params.use_pointless: worker = Pointless() pointless_correct = worker.run_for_symm( xdsin=xac_hkl, logout=os.path.join(root, "pointless_correct.log")) pointless_best_symm = None if "symm" in pointless_correct: symm = pointless_correct["symm"] need_rescale = False if pointless_integrate.get("symm"): symm_by_integrate = pointless_integrate["symm"] if not xtal.is_same_laue_symmetry( symm_by_integrate.space_group(), symm.space_group()): print >> decilog, "pointless suggested %s, which is different Laue symmetry from INTEGRATE.HKL (%s)" % ( symm.space_group_info(), symm_by_integrate.space_group_info()) prob_integrate = pointless_integrate.get( "laue_prob", float("nan")) prob_correct = pointless_correct.get( "laue_prob", float("nan")) print >> decilog, " Prob(%s |INTEGRATE), Prob(%s |CORRECT) = %.4f, %.4f." % ( symm_by_integrate.space_group_info(), symm.space_group_info(), prob_integrate, prob_correct) if prob_correct > prob_integrate: need_rescale = True pointless_best_symm = symm else: pointless_best_symm = symm_by_integrate else: need_rescale = True pointless_best_symm = symm print >> decilog, "pointless using XDS_ASCII.HKL suggested %s" % symm.space_group_info( ) if xs_prior: if xtal.is_same_space_group_ignoring_enantiomorph( symm.space_group(), xs_prior.space_group()): print >> decilog, " which is consistent with given symmetry." elif xtal.is_same_laue_symmetry( symm.space_group(), xs_prior.space_group()): print >> decilog, " which has consistent Laue symmetry with given symmetry." else: print >> decilog, " which is inconsistent with given symmetry." if need_rescale and not flag_do_not_change_symm: sgnum = symm.space_group_info().type().number() cell = " ".join( map(lambda x: "%.2f" % x, symm.unit_cell().parameters())) modify_xdsinp(xdsinp, inp_params=[ ("JOB", "CORRECT"), ("SPACE_GROUP_NUMBER", "%d" % sgnum), ("UNIT_CELL_CONSTANTS", cell), ("INCLUDE_RESOLUTION_RANGE", "50 0") ]) run_xds(wdir=root, show_progress=params.show_progress) ret = calc_merging_stats(xac_hkl) if params.cut_resolution: if ret is not None and ret[0] is not None: d_min = ret[0] modify_xdsinp(xdsinp, inp_params=[ ("JOB", "CORRECT"), ("INCLUDE_RESOLUTION_RANGE", "50 %.2f" % d_min) ]) print >> decilog, " Re-scale at %.2f A" % d_min os.rename(os.path.join(root, "CORRECT.LP"), os.path.join(root, "CORRECT_fullres.LP")) os.rename( xac_hkl, os.path.join(root, "XDS_ASCII_fullres.HKL")) run_xds(wdir=root, show_progress=params.show_progress) print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa( correct_lp, check_valid=True) print >> decilog, " (Original files are saved as *_fullres.*)" else: print >> decilog, "error: Can't decide resolution." for f in ("CORRECT_fullres.LP", "XDS_ASCII_fullres.HKL"): if os.path.isfile(os.path.join(root, f)): print >> decilog, "removing", f os.remove(os.path.join(root, f)) ISa = correctlp.get_ISa(correct_lp, check_valid=True) if ISa >= last_ISa or last_ISa != last_ISa: # if improved or last_ISa is nan print >> decilog, "ISa improved= %.2f" % ISa else: print >> decilog, "ISa got worse= %.2f" % ISa if pointless_best_symm: xac_symm = XDS_ASCII(xac_hkl, read_data=False).symm if not xtal.is_same_space_group_ignoring_enantiomorph( xac_symm.space_group(), pointless_best_symm.space_group()): if xtal.is_same_laue_symmetry( xac_symm.space_group(), pointless_best_symm.space_group()): tmp = "same Laue symmetry" else: tmp = "different Laue symmetry" print >> decilog, "WARNING: symmetry in scaling is different from Pointless result (%s)." % tmp run_xdsstat(wdir=root) print if params.make_report: html_report.make_individual_report(root, root) except: print >> decilog, traceback.format_exc() finally: print >> decilog, "\nxds_sequence finished at %s" % time.strftime( "%Y-%m-%d %H:%M:%S") decilog.close()
def run(params): if os.path.isdir(params.workdir) and os.listdir(params.workdir): print "Directory already exists and not empty:", params.workdir return if params.reference_file is not None and params.program != "xscale": print "WARNING - reference file is not used unless program=xscale." if not os.path.isdir(params.workdir): os.makedirs(params.workdir) if params.batch.engine == "sge": batchjobs = batchjob.SGE(pe_name=params.batch.sge_pe_name) elif params.batch.engine == "sh": batchjobs = batchjob.ExecLocal(max_parallel=params.batch.sh_max_jobs) else: raise "Unknown batch engine: %s" % params.batch.engine out = multi_out() out.register("log", open(os.path.join(params.workdir, "multi_merge.log"), "w"), atexit_send_to=None) out.register("stdout", sys.stdout) print >>out, "Paramters:" libtbx.phil.parse(master_params_str).format(params).show(out=out, prefix=" ") print >>out, "" # XXX Not works when clustering is used.. html_report = multi_merging.html_report.HtmlReportMulti(os.path.abspath(params.workdir)) try: html_report.add_params(params, master_params_str) except: print >>out, traceback.format_exc() xds_ascii_files = map(lambda x: x[:(x.index("#") if "#" in x else None)].strip(), open(params.lstin)) xds_ascii_files = filter(lambda x: x!="" and os.path.isfile(x), xds_ascii_files) xds_ascii_files = map(lambda x: os.path.abspath(x), xds_ascii_files) cells = collections.OrderedDict() laues = {} # for check for xac in xds_ascii_files: try: symm = XDS_ASCII(xac, read_data=False).symm except: try: symm = any_reflection_file(xac).as_miller_arrays()[0].crystal_symmetry() except: print >>out, "Error in reading %s" % xac print >>out, traceback.format_exc() return cells[xac] = symm.unit_cell().parameters() laue = symm.space_group().build_derived_reflection_intensity_group(False).info() laues.setdefault(str(laue),{}).setdefault(symm.space_group_info().type().number(), []).append(xac) if len(laues) > 1: print >>out, "ERROR! more than one space group included." for laue in laues: print "Laue symmetry", laue for sg in laues[laue]: print >>out, " SPACE_GROUP_NUMBER= %d (%d data)" % (sg, len(laues[laue][sg])) for f in laues[laue][sg]: print >>out, " %s" % f print >>out, "" return space_group = None if params.space_group is not None: space_group = sgtbx.space_group_info(params.space_group).group() laue_given = str(space_group.build_derived_reflection_intensity_group(False).info()) if laue_given != laues.keys()[0]: print >>out, "ERROR! user-specified space group (space_group=%s) is not compatible with input files (%s)" % (params.space_group, laues.keys()[0]) return else: tmp = sgtbx.space_group_info(laues.values()[0].keys()[0]).group().build_derived_reflection_intensity_group(True) print >>out, "Space group for merging:", tmp.info() try: html_report.add_cells_and_files(cells, laues.keys()[0]) except: print >>out, traceback.format_exc() data_for_merge = [] if params.clustering == "blend": if params.blend.use_old_result is None: blend_wdir = os.path.join(params.workdir, "blend") os.mkdir(blend_wdir) blend.run_blend0R(blend_wdir, xds_ascii_files) print >>out, "\nRunning BLEND with analysis mode" else: blend_wdir = params.blend.use_old_result print >>out, "\nUsing precalculated BLEND result in %s" % params.blend.use_old_result blend_clusters = blend.BlendClusters(workdir=blend_wdir, d_min=params.d_min) summary_out = os.path.join(blend_wdir, "blend_cluster_summary.dat") clusters = blend_clusters.show_cluster_summary(out=open(summary_out, "w")) print >>out, "Clusters found by BLEND were summarized in %s" % summary_out if params.blend.min_cmpl is not None: clusters = filter(lambda x: x[3] >= params.blend.min_cmpl, clusters) if params.blend.min_acmpl is not None: clusters = filter(lambda x: x[5] >= params.blend.min_acmpl, clusters) if params.blend.min_redun is not None: clusters = filter(lambda x: x[4] >= params.blend.min_redun, clusters) if params.blend.min_aredun is not None: clusters = filter(lambda x: x[6] >= params.blend.min_aredun, clusters) if params.blend.max_LCV is not None: clusters = filter(lambda x: x[7] <= params.blend.max_LCV, clusters) if params.blend.max_aLCV is not None: clusters = filter(lambda x: x[8] <= params.blend.max_aLCV, clusters) if params.max_clusters is not None and len(clusters) > params.max_clusters: print >>out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % (params.max_clusters, len(clusters)) clusters = clusters[:params.max_clusters] print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters) for clno, IDs, clh, cmpl, redun, acmpl, aredun, LCV, aLCV in clusters: # process largest first print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f LCV= %5.1f aLCV=%5.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun, LCV, aLCV) data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno), map(lambda x: blend_clusters.files[x-1], IDs), LCV, aLCV,clh)) print >>out try: html_report.add_clutering_result(clusters, "blend") except: print >>out, traceback.format_exc() elif params.clustering == "cc": ccc_wdir = os.path.join(params.workdir, "cc_clustering") os.mkdir(ccc_wdir) cc_clusters = cc_clustering.CCClustering(ccc_wdir, xds_ascii_files, d_min=params.cc_clustering.d_min if params.cc_clustering.d_min is not None else params.d_min, min_ios=params.cc_clustering.min_ios) print >>out, "\nRunning CC-based clustering" cc_clusters.do_clustering(nproc=params.cc_clustering.nproc, b_scale=params.cc_clustering.b_scale, use_normalized=params.cc_clustering.use_normalized, html_maker=html_report) summary_out = os.path.join(ccc_wdir, "cc_cluster_summary.dat") clusters = cc_clusters.show_cluster_summary(d_min=params.d_min, out=open(summary_out, "w")) print >>out, "Clusters were summarized in %s" % summary_out if params.cc_clustering.min_cmpl is not None: clusters = filter(lambda x: x[3] >= params.cc_clustering.min_cmpl, clusters) if params.cc_clustering.min_acmpl is not None: clusters = filter(lambda x: x[5] >= params.cc_clustering.min_acmpl, clusters) if params.cc_clustering.min_redun is not None: clusters = filter(lambda x: x[4] >= params.cc_clustering.min_redun, clusters) if params.cc_clustering.min_aredun is not None: clusters = filter(lambda x: x[6] >= params.cc_clustering.min_aredun, clusters) if params.cc_clustering.max_clheight is not None: clusters = filter(lambda x: x[2] <= params.cc_clustering.max_clheight, clusters) if params.max_clusters is not None and len(clusters) > params.max_clusters: print >>out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % (params.max_clusters, len(clusters)) clusters = clusters[:params.max_clusters] print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters) for clno, IDs, clh, cmpl, redun, acmpl, aredun in clusters: # process largest first print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun) data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno), map(lambda x: xds_ascii_files[x-1], IDs), float("nan"),float("nan"),clh)) print >>out try: html_report.add_clutering_result(clusters, "cc_clustering") except: print >>out, traceback.format_exc() else: data_for_merge.append((os.path.join(params.workdir, "all_data"), xds_ascii_files, float("nan"), float("nan"), 0)) ofs_summary = open(os.path.join(params.workdir, "cluster_summary.dat"), "w") ofs_summary.write("# d_min= %.3f A\n" % (params.d_min if params.d_min is not None else float("nan"))) ofs_summary.write("# LCV and aLCV are values of all data\n") ofs_summary.write(" cluster ClH LCV aLCV run ds.all ds.used Cmpl Redun I/sigI Rmeas CC1/2 Cmpl.ou Red.ou I/sig.ou Rmeas.ou CC1/2.ou Cmpl.in Red.in I/sig.in Rmeas.in CC1/2.in SigAno.in CCano.in WilsonB Aniso \n") out.flush() def write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats): tmps = "%12s %5.2f %4.1f %4.1f %3d %6d %7d %5.1f %5.1f %6.2f %5.1f %5.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %9.1f %8.1f %7.2f %7.1e\n" ofs_summary.write(tmps % (os.path.relpath(workdir, params.workdir), clh, LCV, aLCV, cycle, len(xds_files), num_files, stats["cmpl"][0], stats["redundancy"][0], stats["i_over_sigma"][0], stats["r_meas"][0], stats["cc_half"][0], stats["cmpl"][2], stats["redundancy"][2], stats["i_over_sigma"][2], stats["r_meas"][2], stats["cc_half"][2], stats["cmpl"][1], stats["redundancy"][1], stats["i_over_sigma"][1], stats["r_meas"][1], stats["cc_half"][1], stats["sig_ano"][1], stats["cc_ano"][1], stats["xtriage_log"].wilson_b, stats["xtriage_log"].anisotropy, )) ofs_summary.flush() # write_ofs_summary() if "merging" in params.batch.par_run: params.nproc = params.batch.nproc_each jobs = [] for workdir, xds_files, LCV, aLCV, clh in data_for_merge: if not os.path.exists(workdir): os.makedirs(workdir) shname = "merge_%s.sh" % os.path.relpath(workdir, params.workdir) pickle.dump((params, os.path.abspath(workdir), xds_files, cells, space_group, batchjobs), open(os.path.join(workdir, "args.pkl"), "w"), -1) job = batchjob.Job(workdir, shname, nproc=params.batch.nproc_each) job.write_script("""\ cd "%s" || exit 1 "%s" -c '\ import pickle; \ from yamtbx.dataproc.auto.command_line.multi_merge import merge_datasets; \ args = pickle.load(open("args.pkl")); \ ofs = open("result.pkl","w"); \ ret = merge_datasets(*args); \ pickle.dump(ret, ofs); \ ' """ % (os.path.abspath(workdir), sys.executable)) batchjobs.submit(job) jobs.append(job) batchjobs.wait_all(jobs) for workdir, xds_files, LCV, aLCV, clh in data_for_merge: try: results = pickle.load(open(os.path.join(workdir, "result.pkl"))) except: print >>out, "Error in unpickling result in %s" % workdir print >>out, traceback.format_exc() results = [] if len(results) == 0: ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir)) lcv, alcv = float("nan"), float("nan") for cycle, wd, num_files, stats in results: lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV) write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files, num_files, stats) # Last lcv & alcv try: html_report.add_merge_result(workdir, clh, lcv, alcv, xds_files, results[-1][2], results[-1][3]) except: print >>out, traceback.format_exc() else: for workdir, xds_files, LCV, aLCV, clh in data_for_merge: print >>out, "Merging %s..." % os.path.relpath(workdir, params.workdir) out.flush() results = merge_datasets(params, workdir, xds_files, cells, space_group, batchjobs) if len(results) == 0: ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir)) for cycle, wd, num_files, stats in results: lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV) write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files, num_files, stats) try: html_report.add_merge_result(workdir, clh, lcv, alcv, xds_files, results[-1][2], results[-1][3]) except: print >>out, traceback.format_exc() try: html_report.write_html() except: print >>out, traceback.format_exc() print "firefox %s" % os.path.join(html_report.root, "report.html") return
def run(params): if os.path.isdir(params.workdir) and os.listdir(params.workdir): print "Directory already exists and not empty:", params.workdir return if params.reference_file is not None and params.program != "xscale": print "WARNING - reference file is not used unless program=xscale." if not os.path.isdir(params.workdir): os.makedirs(params.workdir) if params.batch.engine == "sge": batchjobs = batchjob.SGE(pe_name=params.batch.sge_pe_name) elif params.batch.engine == "sh": batchjobs = batchjob.ExecLocal() else: raise "Unknown batch engine: %s" % params.batch.engine out = multi_out() out.register("log", open(os.path.join(params.workdir, "multi_merge.log"), "w"), atexit_send_to=None) out.register("stdout", sys.stdout) print >>out, "Paramters:" libtbx.phil.parse(master_params_str).format(params).show(out=out, prefix=" ") print >>out, "" # XXX Not works when clustering is used.. html_report = multi_merging.html_report.HtmlReportMulti(os.path.abspath(params.workdir)) try: html_report.add_params(params, master_params_str) except: print >>out, traceback.format_exc() xds_ascii_files = map(lambda x: x[:(x.index("#") if "#" in x else None)].strip(), open(params.lstin)) xds_ascii_files = filter(lambda x: x!="" and os.path.isfile(x), xds_ascii_files) xds_ascii_files = map(lambda x: os.path.abspath(x), xds_ascii_files) cells = collections.OrderedDict() laues = {} # for check for xac in xds_ascii_files: try: symm = XDS_ASCII(xac, read_data=False).symm except: print >>out, "Error in reading %s" % xac print >>out, traceback.format_exc() return cells[xac] = symm.unit_cell().parameters() laue = symm.space_group().build_derived_reflection_intensity_group(False).info() laues.setdefault(str(laue),{}).setdefault(symm.space_group_info().type().number(), []).append(xac) if len(laues) > 1: print >>out, "ERROR! more than one space group included." for laue in laues: print "Laue symmetry", laue for sg in laues[laue]: print >>out, " SPACE_GROUP_NUMBER= %d (%d data)" % (sg, len(laues[laue][sg])) for f in laues[laue][sg]: print >>out, " %s" % f print >>out, "" return try: html_report.add_cells_and_files(cells, laues.keys()[0]) except: print >>out, traceback.format_exc() data_for_merge = [] if params.clustering == "blend": if params.blend.use_old_result is None: blend_wdir = os.path.join(params.workdir, "blend") os.mkdir(blend_wdir) blend.run_blend0R(blend_wdir, xds_ascii_files) print >>out, "\nRunning BLEND with analysis mode" else: blend_wdir = params.blend.use_old_result print >>out, "\nUsing precalculated BLEND result in %s" % params.blend.use_old_result blend_clusters = blend.BlendClusters(workdir=blend_wdir, d_min=params.d_min) summary_out = os.path.join(blend_wdir, "blend_cluster_summary.dat") clusters = blend_clusters.show_cluster_summary(out=open(summary_out, "w")) print >>out, "Clusters found by BLEND were summarized in %s" % summary_out if params.blend.min_cmpl is not None: clusters = filter(lambda x: x[3] >= params.blend.min_cmpl, clusters) if params.blend.min_acmpl is not None: clusters = filter(lambda x: x[5] >= params.blend.min_acmpl, clusters) if params.blend.min_redun is not None: clusters = filter(lambda x: x[4] >= params.blend.min_redun, clusters) if params.blend.min_aredun is not None: clusters = filter(lambda x: x[6] >= params.blend.min_aredun, clusters) if params.blend.max_LCV is not None: clusters = filter(lambda x: x[7] <= params.blend.max_LCV, clusters) if params.blend.max_aLCV is not None: clusters = filter(lambda x: x[8] <= params.blend.max_aLCV, clusters) print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters) for clno, IDs, clh, cmpl, redun, acmpl, aredun, LCV, aLCV in clusters: # process largest first print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f LCV= %5.1f aLCV=%5.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun, LCV, aLCV) data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno), map(lambda x: blend_clusters.files[x-1], IDs), LCV, aLCV,clh)) print >>out try: html_report.add_clutering_result(clusters, "blend") except: print >>out, traceback.format_exc() elif params.clustering == "cc": ccc_wdir = os.path.join(params.workdir, "cc_clustering") os.mkdir(ccc_wdir) cc_clusters = cc_clustering.CCClustering(ccc_wdir, xds_ascii_files, d_min=params.cc_clustering.d_min, min_ios=params.cc_clustering.min_ios) print >>out, "\nRunning CC-based clustering" cc_clusters.do_clustering(nproc=params.cc_clustering.nproc, b_scale=params.cc_clustering.b_scale, use_normalized=params.cc_clustering.use_normalized, html_maker=html_report) summary_out = os.path.join(ccc_wdir, "cc_cluster_summary.dat") clusters = cc_clusters.show_cluster_summary(d_min=params.d_min, out=open(summary_out, "w")) print >>out, "Clusters were summarized in %s" % summary_out if params.cc_clustering.min_cmpl is not None: clusters = filter(lambda x: x[3] >= params.cc_clustering.min_cmpl, clusters) if params.cc_clustering.min_acmpl is not None: clusters = filter(lambda x: x[5] >= params.cc_clustering.min_acmpl, clusters) if params.cc_clustering.min_redun is not None: clusters = filter(lambda x: x[4] >= params.cc_clustering.min_redun, clusters) if params.cc_clustering.min_aredun is not None: clusters = filter(lambda x: x[6] >= params.cc_clustering.min_aredun, clusters) if params.cc_clustering.max_clheight is not None: clusters = filter(lambda x: x[2] <= params.cc_clustering.max_clheight, clusters) print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters) for clno, IDs, clh, cmpl, redun, acmpl, aredun in clusters: # process largest first print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun) data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno), map(lambda x: xds_ascii_files[x-1], IDs), float("nan"),float("nan"),clh)) print >>out try: html_report.add_clutering_result(clusters, "cc_clustering") except: print >>out, traceback.format_exc() else: data_for_merge.append((os.path.join(params.workdir, "all_data"), xds_ascii_files, float("nan"), float("nan"), 0)) ofs_summary = open(os.path.join(params.workdir, "cluster_summary.dat"), "w") ofs_summary.write("# d_min= %.3f A\n" % (params.d_min if params.d_min is not None else float("nan"))) ofs_summary.write("# LCV and aLCV are values of all data\n") ofs_summary.write(" cluster ClH LCV aLCV run ds.all ds.used Cmpl Redun I/sigI Rmeas CC1/2 Cmpl.ou Red.ou I/sig.ou Rmeas.ou CC1/2.ou Cmpl.in Red.in I/sig.in Rmeas.in CC1/2.in SigAno.in CCano.in WilsonB Aniso \n") out.flush() def write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats): tmps = "%12s %5.2f %4.1f %4.1f %3d %6d %7d %5.1f %5.1f %6.2f %5.1f %5.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %9.1f %8.1f %7.2f %7.1e\n" ofs_summary.write(tmps % (os.path.relpath(workdir, params.workdir), clh, LCV, aLCV, cycle, len(xds_files), num_files, stats["cmpl"][0], stats["redundancy"][0], stats["i_over_sigma"][0], stats["r_meas"][0], stats["cc_half"][0], stats["cmpl"][2], stats["redundancy"][2], stats["i_over_sigma"][2], stats["r_meas"][2], stats["cc_half"][2], stats["cmpl"][1], stats["redundancy"][1], stats["i_over_sigma"][1], stats["r_meas"][1], stats["cc_half"][1], stats["sig_ano"][1], stats["cc_ano"][1], stats["xtriage_log"].wilson_b, stats["xtriage_log"].anisotropy, )) ofs_summary.flush() # write_ofs_summary() if "merging" in params.batch.par_run: params.nproc = params.batch.nproc_each jobs = [] for workdir, xds_files, LCV, aLCV, clh in data_for_merge: if not os.path.exists(workdir): os.makedirs(workdir) shname = "merge_%s.sh" % os.path.relpath(workdir, params.workdir) pickle.dump((params, os.path.abspath(workdir), xds_files, cells, batchjobs), open(os.path.join(workdir, "args.pkl"), "w"), -1) job = batchjob.Job(workdir, shname, nproc=params.batch.nproc_each) job.write_script("""\ "%s" -c '\ import pickle; \ from yamtbx.dataproc.auto.command_line.multi_merge import merge_datasets; \ args = pickle.load(open("args.pkl")); \ ret = merge_datasets(*args); \ pickle.dump(ret, open("result.pkl","w")); \ ' """ % sys.executable) batchjobs.submit(job) jobs.append(job) batchjobs.wait_all(jobs) for workdir, xds_files, LCV, aLCV, clh in data_for_merge: try: results = pickle.load(open(os.path.join(workdir, "result.pkl"))) except: print >>out, "Error in unpickling result in %s" % workdir print >>out, traceback.format_exc() results = [] if len(results) == 0: ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir)) for cycle, wd, num_files, stats in results: write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats) try: html_report.add_merge_result(workdir, clh, LCV, aLCV, xds_files, results[-1][2], results[-1][3]) except: print >>out, traceback.format_exc() else: for workdir, xds_files, LCV, aLCV, clh in data_for_merge: print >>out, "Merging %s..." % os.path.relpath(workdir, params.workdir) out.flush() results = merge_datasets(params, workdir, xds_files, cells, batchjobs) if len(results) == 0: ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir)) for cycle, wd, num_files, stats in results: write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats) try: html_report.add_merge_result(workdir, clh, LCV, aLCV, xds_files, results[-1][2], results[-1][3]) except: print >>out, traceback.format_exc() try: html_report.write_html() except: print >>out, traceback.format_exc() print "firefox %s" % os.path.join(html_report.root, "report.html") return