def run(params): if params.datout is None: params.datout = os.path.basename(params.lstin)+".dat" xac_files = read_path_list(params.lstin) ofs_dat = open(params.datout, "w") ref_v6cell = None if params.ref_cell is not None: ref_v6cell = v6cell(uctbx.unit_cell(params.ref_cell).niggli_cell()) ofs_dat.write("# ref_cell= %s\n" % params.ref_cell) if params.n_residues is not None: ofs_dat.write("# n_residues= %d\n" % params.n_residues) ofs_dat.write("file ioversigma resnatsnr1 wilsonb abdist a b c al be ga\n") ret = easy_mp.pool_map(fixed_func=lambda x: calc_stats(x, params.stats, params.n_residues, ref_v6cell, params.min_peak, params.min_peak_percentile, params.correct_peak), args=xac_files, processes=params.nproc) for stat in ret: getornan = lambda x: stat.get(x, float("nan")) # get or nan ofs_dat.write("%s %.3f %.3f %.3f %.3e"%(stat["filename"], getornan("ioversigma"), getornan("resnatsnr1"), getornan("wilsonb"), getornan("abdist"))) ofs_dat.write(" %.3f %.3f %.3f %.2f %.2f %.2f\n" % stat["cell"]) ofs_dat.close()
def read_dials_phil(phil_in, dials_phil_selection_lst): master_str = """\ input { experiments = None .type = path .multiple = true reflections = None .type = path .multiple = true } """ params = iotbx.phil.process_command_line( args=[phil_in], master_string=master_str).work.extract() assert len(params.input.experiments) == len(params.input.reflections) assert all(map(lambda x: os.path.isfile(x), params.input.experiments)) assert all(map(lambda x: os.path.isfile(x), params.input.reflections)) ret = zip(params.input.experiments, params.input.reflections) if dials_phil_selection_lst: selected_files = set(read_path_list(dials_phil_selection_lst)) ret = filter(lambda x: x[1] in selected_files, ret) return ret
def run(opts, files): if len(files) == 1 and files[0].endswith(".lst"): files = read_path_list(files[0]) make_geom(files[0], os.path.basename(files[0]) + ".geom") easy_mp.pool_map(fixed_func=run_each, args=files, processes=opts.nproc)
def run(params): if params.datout is None: params.datout = os.path.basename(params.lstin) + ".dat" xac_files = read_path_list(params.lstin) ofs_dat = open(params.datout, "w") ref_v6cell = None if params.ref_cell is not None: ref_v6cell = v6cell(uctbx.unit_cell(params.ref_cell).niggli_cell()) ofs_dat.write("# ref_cell= %s\n" % params.ref_cell) if params.n_residues is not None: ofs_dat.write("# n_residues= %d\n" % params.n_residues) ofs_dat.write("file ioversigma resnatsnr1 wilsonb abdist a b c al be ga\n") ret = easy_mp.pool_map(fixed_func=lambda x: calc_stats( x, params.stats, params.n_residues, ref_v6cell, params.min_peak, params .min_peak_percentile, params.correct_peak), args=xac_files, processes=params.nproc) for stat in ret: getornan = lambda x: stat.get(x, float("nan")) # get or nan ofs_dat.write( "%s %.3f %.3f %.3f %.3e" % (stat["filename"], getornan("ioversigma"), getornan("resnatsnr1"), getornan("wilsonb"), getornan("abdist"))) ofs_dat.write(" %.3f %.3f %.3f %.2f %.2f %.2f\n" % stat["cell"]) ofs_dat.close()
def run(opts, files): if len(files) == 1 and files[0].endswith(".lst"): files = read_path_list(files[0]) make_geom(files[0], os.path.basename(files[0])+".geom") easy_mp.pool_map(fixed_func=run_each, args=files, processes=opts.nproc)
def run(params): log_out = multi_out() log_out.register("log", open(params.logfile, "w"), atexit_send_to=None) log_out.register("stdout", sys.stdout) libtbx.phil.parse(master_params_str).format(params).show(out=log_out, prefix=" ") xac_files = read_path_list(params.lstin, only_exists=True, err_out=log_out) if len(xac_files) == 0: print >>log_out, "No (existing) files in the list: %s" % params.lstin return if params.method == "brehm_diederichs": rb = BrehmDiederichs(xac_files, max_delta=params.max_delta, d_min=params.d_min, min_ios=params.min_ios, nproc=params.nproc, log_out=log_out) elif params.method == "selective_breeding": rb = KabschSelectiveBreeding(xac_files, max_delta=params.max_delta, d_min=params.d_min, min_ios=params.min_ios, nproc=params.nproc, log_out=log_out) elif params.method == "reference": rb = ReferenceBased(xac_files, params.reference_file, max_delta=params.max_delta, d_min=params.d_min, min_ios=params.min_ios, nproc=params.nproc, log_out=log_out) else: raise "Unknown method: %s" % params.method rb.assign_operators() new_files = rb.modify_xds_ascii_files() lstout = os.path.splitext(os.path.basename(params.lstin))[0]+"_reindexed.lst" ofs = open(lstout, "w") ofs.write("\n".join(new_files)+"\n") ofs.close() print >>log_out, "Reindexing done. For merging, use %s instead!" % lstout if params.method == "brehm_diederichs": print >>log_out, """ CCTBX-implementation (by Richard Gildea) of the "algorithm 2" of the following paper was used. For publication, please cite: Brehm, W. and Diederichs, K. Breaking the indexing ambiguity in serial crystallography. Acta Cryst. (2014). D70, 101-109 http://dx.doi.org/10.1107/S1399004713025431""" elif params.method == "selective_breeding": print >>log_out, """
def run(params, target_files): assert params.normalization in ("no", "E") ofs = open(params.dat_out, "w") xac_files = util.read_path_list(params.lstin) targets = read_target_files(target_files, params.d_min, params.d_max, params.normalization, ofs) cellcon = CellConstraints(targets.values()[0].space_group()) #for i, t in enumerate(targets): ofs.write("# target%.3d = %s\n" % (i,t)) ofs.write("# normalization = %s\n" % params.normalization) ofs.write("# d_min, d_max = %s, %s\n" % (params.d_min, params.d_max)) ofs.write("file %s " % cellcon.get_label_for_free_params()) ofs.write(" ".join( map(lambda x: "cc.%.3d nref.%.3d" % (x, x), xrange(len(targets))))) ofs.write("\n") for xac_file in xac_files: print "reading", xac_file xac = xds_ascii.XDS_ASCII(xac_file) xac.remove_rejected() iobs = xac.i_obs(anomalous_flag=False).merge_equivalents( use_internal_variance=False).array() ofs.write("%s %s" % (xac_file, cellcon.format_free_params(iobs.unit_cell()))) fail_flag = False if params.normalization == "E": try: normaliser = kernel_normalisation(iobs, auto_kernel=True) iobs = iobs.customized_copy( data=iobs.data() / normaliser.normalizer_for_miller_array, sigmas=iobs.sigmas() / normaliser.normalizer_for_miller_array) except: fail_flag = True for i, ta in enumerate(targets.values()): if fail_flag: ofs.write(" % .4f %4d" % cc_num) else: cc_num = calc_cc(iobs, ta) ofs.write(" % .4f %4d" % cc_num) ofs.write("\n")
def run(params, args): ref_xs = None if None not in (params.ref_cell, params.ref_symm): ref_xs = crystal.symmetry(params.ref_cell, params.ref_symm) if len(args) == 1 and args[0].endswith(".lst"): args = read_path_list(args[0]) if len(args) == 0: return angles = [] for arg in args: if xds_ascii.is_xds_ascii(arg): angles.extend(from_xds_ascii(arg, ref_xs)) else: angles.extend(from_xparm(arg, ref_xs)) if params.dat_out: make_dat(angles, params.dat_out) if params.plot_out: make_plot(angles, params.plot_out)
def run(params): ref_xs = None if None not in (params.ref_cell, params.ref_symm): ref_xs = crystal.symmetry(params.ref_cell, params.ref_symm) if len(params.input) == 1 and params.input[0].endswith(".lst"): params.input = read_path_list(params.input[0]) if len(params.input) == 0: return angles = [] for arg in params.input: if ".stream" in arg: angles.extend(from_crystfel_stream(arg, ref_xs)) elif xds_ascii.is_xds_ascii(arg): angles.extend(from_xds_ascii(arg, ref_xs)) else: angles.extend(from_xparm(arg, ref_xs)) if params.dat_out: make_dat(angles, params.dat_out) if params.plot_out: make_plot(angles, params.plot_out)
def run(lstin, params): xac_files = read_path_list(lstin) common0 = len(os.path.commonprefix(xac_files)) arrays = [] for f in xac_files: xac = XDS_ASCII(f, i_only=True) xac.remove_rejected() a = xac.i_obs().resolution_filter(d_min=params.d_min, d_max=params.d_max) a = a.merge_equivalents(use_internal_variance=False).array() a = a.select(a.data() / a.sigmas() >= params.min_ios) arrays.append(a) # Prep args = [] for i in xrange(len(arrays) - 1): for j in xrange(i + 1, len(arrays)): args.append((i, j)) # Calc all CC worker = lambda x: calc_cc(arrays[x[0]], arrays[x[1]]) results = easy_mp.pool_map(fixed_func=worker, args=args, processes=params.nproc) # Make matrix mat = numpy.zeros(shape=(len(arrays), len(arrays))) for (i, j), (cc, nref) in zip(args, results): print j, i, cc mat[j, i] = cc open("%s.names" % params.prefix, "w").write("\n".join(map(lambda x: os.path.dirname(x[common0:]), xac_files))) open("%s.matrix" % params.prefix, "w").write(" ".join(map(lambda x: "%.4f" % x, mat.flatten()))) ofs = open("%s.dat" % params.prefix, "w") ofs.write("i j cc nref\n") for (i, j), (cc, nref) in zip(args, results): ofs.write("%4d %4d %.4f %4d\n" % (i, j, cc, nref)) open("%s_ana.R" % params.prefix, "w").write( """\ treeToList2 <- function(htree) { # stolen from $CCP4/share/blend/R/blend0.R groups <- list() itree <- dim(htree$merge)[1] for (i in 1:itree) { il <- htree$merge[i,1] ir <- htree$merge[i,2] if (il < 0) lab1 <- htree$labels[-il] if (ir < 0) lab2 <- htree$labels[-ir] if (il > 0) lab1 <- groups[[il]] if (ir > 0) lab2 <- groups[[ir]] lab <- c(lab1,lab2) lab <- as.integer(lab) groups <- c(groups,list(lab)) } return(groups) } cc<-scan("%s.matrix") md<-matrix(1-cc, ncol=%d, byrow=TRUE) labs<-read.table("%s.names") filenames<-read.table("%s")$V1 rownames(md)<-labs$V1 hc <- hclust(as.dist(md),method="ward") pdf("tree.pdf") plot(hc) dev.off() hc$labels <- 1:nrow(md) groups <- treeToList2(hc) cat("ClNumber Nds Clheight\\n",file="./CLUSTERS.txt") for (i in 1:length(groups)) { sorted_groups <- sort(groups[[i]]) linea <- paste(sprintf(" %%03d %%3d %%7.3f\\n", i,length(groups[[i]]),hc$height[i]),sep="") cat(linea, file="./CLUSTERS.txt", append=TRUE) write.table(filenames[sorted_groups], sprintf("cluster%%.3d.lst",i), quote=FALSE, row.names=FALSE, col.names=FALSE) } q(save="yes") """ % (params.prefix, len(arrays), params.prefix, lstin) ) print "R --vanilla < %s_ana.R" % params.prefix
def run(params): if not params.workdir: print "Give workdir=" return if os.path.exists(params.workdir): print "workdir already exists:", params.workdir return params.workdir = os.path.abspath(params.workdir) if None not in (params.unit_cell, params.space_group): user_xs = crystal.symmetry(params.unit_cell, params.space_group) else: user_xs = None from yamtbx.dataproc.auto.command_line.multi_check_cell_consistency import CellGraph cm = CellGraph(tol_length=params.cell_grouping.tol_length, tol_angle=params.cell_grouping.tol_angle) if len(params.xdsdir) == 1 and os.path.isfile(params.xdsdir[0]): params.xdsdir = util.read_path_list(params.xdsdir[0]) xds_dirs = [] for xd in params.xdsdir: xds_dirs.extend( map( lambda x: x[0], filter( lambda x: any( map(lambda y: y.startswith("XDS_ASCII.HKL"), x[2])) or "DIALS.HKL" in x[2], os.walk(os.path.abspath(xd))))) for i, xd in enumerate(xds_dirs): cm.add_proc_result(i, xd) pm = PrepMerging(cm) print pm.find_groups() if len(cm.groups) == 0: print "Oh, no. No data." return if params.group_choice is None: while True: try: val = int( raw_input("Input group number [%d..%d]: " % (1, len(cm.groups)))) if not 0 < val <= len(cm.groups): raise ValueError params.group_choice = val break except ValueError: continue symms = cm.get_selectable_symms(params.group_choice - 1) symmidx = -1 if user_xs: #for xs in cm.get_selectable_symms(params.group_choice): raise "Not supported now." while True: try: val = int( raw_input("Input symmetry number [%d..%d]: " % (0, len(symms) - 1))) if not 0 <= val < len(symms): raise ValueError symmidx = val break except ValueError: continue os.mkdir(params.workdir) topdir = os.path.dirname(os.path.commonprefix(xds_dirs)) pm.prep_merging(group=params.group_choice, symmidx=symmidx, workdir=params.workdir, topdir=topdir, cell_method=params.cell_method, nproc=params.nproc, prep_dials_files=params.prep_dials_files, into_workdir=params.copy_into_workdir) pm.write_merging_scripts(params.workdir, "par", params.prep_dials_files)
def run(params): if os.path.isdir(params.workdir) and os.listdir(params.workdir): print "Directory already exists and not empty:", params.workdir return # Check parameters if params.program == "xscale": if (params.xscale.frames_per_batch, params.xscale.degrees_per_batch).count(None) == 0: print "ERROR! You can't specify both of xscale.frames_per_batch and xscale.degrees_per_batch" return if params.reference_file is not None and params.program != "xscale": print "WARNING - reference file is not used unless program=xscale." if not os.path.isdir(params.workdir): os.makedirs(params.workdir) if params.batch.engine == "sge": batchjobs = batchjob.SGE(pe_name=params.batch.sge_pe_name) elif params.batch.engine == "sh": batchjobs = batchjob.ExecLocal(max_parallel=params.batch.sh_max_jobs) else: raise "Unknown batch engine: %s" % params.batch.engine out = multi_out() out.register("log", open(os.path.join(params.workdir, "multi_merge.log"), "w"), atexit_send_to=None) out.register("stdout", sys.stdout) out.write("kamo.multi_merge started at %s\n\n" % time.strftime("%Y-%m-%d %H:%M:%S")) time_started = time.time() print >> out, "Paramters:" libtbx.phil.parse(master_params_str).format(params).show(out=out, prefix=" ") print >> out, "" # XXX Not works when clustering is used.. html_report = multi_merging.html_report.HtmlReportMulti( os.path.abspath(params.workdir)) try: html_report.add_params(params, master_params_str) except: print >> out, traceback.format_exc() xds_ascii_files = util.read_path_list(params.lstin, only_exists=True, as_abspath=True, err_out=out) if not xds_ascii_files: print >> out, "ERROR! Cannot find (existing) files in %s." % params.lstin return if len(xds_ascii_files) < 2: print >> out, "ERROR! Only one file in %s." % params.lstin print >> out, " Give at least two files for merging." return cells = collections.OrderedDict() laues = {} # for check for xac in xds_ascii_files: try: symm = XDS_ASCII(xac, read_data=False).symm except: print >> out, "Error in reading %s" % xac print >> out, traceback.format_exc() return cells[xac] = symm.unit_cell().parameters() laue = symm.space_group().build_derived_reflection_intensity_group( False).info() laues.setdefault(str(laue), {}).setdefault( symm.space_group_info().type().number(), []).append(xac) if len(laues) > 1: print >> out, "ERROR! more than one space group included." for laue in laues: print "Laue symmetry", laue for sg in laues[laue]: print >> out, " SPACE_GROUP_NUMBER= %d (%d data)" % ( sg, len(laues[laue][sg])) for f in laues[laue][sg]: print >> out, " %s" % f print >> out, "" return space_group = None if params.space_group is not None: space_group = sgtbx.space_group_info(params.space_group).group() laue_given = str( space_group.build_derived_reflection_intensity_group(False).info()) if laue_given != laues.keys()[0]: print >> out, "ERROR! user-specified space group (space_group=%s) is not compatible with input files (%s)" % ( params.space_group, laues.keys()[0]) return sg_refset = space_group.info().as_reference_setting().group() if space_group != sg_refset: print >> out, "Sorry! currently space group in non-reference setting is not supported." print >> out, "(You requested %s, which is different from reference setting: %s)" % ( space_group.info(), sg_refset.info()) return else: tmp = sgtbx.space_group_info( laues.values()[0].keys() [0]).group().build_derived_reflection_intensity_group(True) print >> out, "Space group for merging:", tmp.info() test_flag_will_be_transferred = False if params.reference.data is not None: params.reference.data = os.path.abspath(params.reference.data) print >> out, "Reading reference data file: %s" % params.reference.data tmp = iotbx.file_reader.any_file(params.reference.data, force_type="hkl", raise_sorry_if_errors=True) if params.reference.copy_test_flag: from yamtbx.dataproc.command_line import copy_free_R_flag if None in copy_free_R_flag.get_flag_array( tmp.file_server.miller_arrays, log_out=out): print >> out, " Warning: no test flag found in reference file (%s)" % params.reference.data else: test_flag_will_be_transferred = True print >> out, " test flag will be transferred" if space_group is not None: if space_group != tmp.file_server.miller_arrays[0].space_group(): print >> out, " ERROR! space_group=(%s) and that of reference.data (%s) do not match." % ( space_group.info(), tmp.file_server.miller_arrays[0].space_group_info()) return else: space_group = tmp.file_server.miller_arrays[0].space_group() print >> out, " space group for merging: %s" % space_group.info() if params.add_test_flag: if test_flag_will_be_transferred: print >> out, "Warning: add_test_flag=True was set, but the flag will be transferred from the reference file given." else: from cctbx import r_free_utils med_cell = numpy.median(cells.values(), axis=0) d_min = max( params.d_min - 0.2, 1.0 ) if params.d_min is not None else 1.5 # to prevent infinite set sg = space_group if not sg: sg = sgtbx.space_group_info( laues.values()[0].keys() [0]).group().build_derived_reflection_intensity_group(True) tmp = miller.build_set(crystal.symmetry(tuple(med_cell), space_group=sg), False, d_min=d_min, d_max=None) print >> out, "Generating test set using the reference symmetry:" crystal.symmetry.show_summary(tmp, out, " ") tmp = tmp.generate_r_free_flags(fraction=0.05, max_free=None, lattice_symmetry_max_delta=5.0, use_lattice_symmetry=True, n_shells=20) tmp.show_r_free_flags_info(out=out, prefix=" ") tmp = tmp.customized_copy( data=r_free_utils.export_r_free_flags_for_ccp4( flags=tmp.data(), test_flag_value=True)) mtz_object = tmp.as_mtz_dataset( column_root_label="FreeR_flag").mtz_object() test_flag_mtz = os.path.abspath( os.path.join(params.workdir, "test_flag.mtz")) mtz_object.write(file_name=test_flag_mtz) # Override the parameters params.reference.copy_test_flag = True params.reference.data = test_flag_mtz try: html_report.add_cells_and_files(cells, laues.keys()[0]) except: print >> out, traceback.format_exc() data_for_merge = [] if params.clustering == "blend": if params.blend.use_old_result is None: blend_wdir = os.path.join(params.workdir, "blend") os.mkdir(blend_wdir) blend.run_blend0R(blend_wdir, xds_ascii_files) print >> out, "\nRunning BLEND with analysis mode" else: blend_wdir = params.blend.use_old_result print >> out, "\nUsing precalculated BLEND result in %s" % params.blend.use_old_result blend_clusters = blend.BlendClusters(workdir=blend_wdir, d_min=params.d_min) summary_out = os.path.join(blend_wdir, "blend_cluster_summary.dat") clusters = blend_clusters.show_cluster_summary( out=open(summary_out, "w")) print >> out, "Clusters found by BLEND were summarized in %s" % summary_out if params.blend.min_cmpl is not None: clusters = filter(lambda x: x[3] >= params.blend.min_cmpl, clusters) if params.blend.min_acmpl is not None: clusters = filter(lambda x: x[5] >= params.blend.min_acmpl, clusters) if params.blend.min_redun is not None: clusters = filter(lambda x: x[4] >= params.blend.min_redun, clusters) if params.blend.min_aredun is not None: clusters = filter(lambda x: x[6] >= params.blend.min_aredun, clusters) if params.blend.max_LCV is not None: clusters = filter(lambda x: x[7] <= params.blend.max_LCV, clusters) if params.blend.max_aLCV is not None: clusters = filter(lambda x: x[8] <= params.blend.max_aLCV, clusters) if params.max_clusters is not None and len( clusters) > params.max_clusters: print >> out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % ( params.max_clusters, len(clusters)) clusters = clusters[:params.max_clusters] if clusters: print >> out, "With specified conditions, following %d clusters will be merged:" % len( clusters) else: print >> out, "\nERROR: No clusters satisfied the specified conditions for merging!" print >> out, "Please change criteria of completeness or redundancy" print >> out, "Here is the table of completeness and redundancy for each cluster:\n" print >> out, open(summary_out).read() for clno, IDs, clh, cmpl, redun, acmpl, aredun, LCV, aLCV in clusters: # process largest first print >> out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f LCV= %5.1f aLCV=%5.1f" % ( clno, len(IDs), clh, cmpl, redun, acmpl, aredun, LCV, aLCV) data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d" % clno), map(lambda x: blend_clusters.files[x - 1], IDs), LCV, aLCV, clh)) print >> out try: html_report.add_clutering_result(clusters, "blend") except: print >> out, traceback.format_exc() elif params.clustering == "cc": ccc_wdir = os.path.join(params.workdir, "cc_clustering") os.mkdir(ccc_wdir) cc_clusters = cc_clustering.CCClustering( ccc_wdir, xds_ascii_files, d_min=params.cc_clustering.d_min if params.cc_clustering.d_min is not None else params.d_min, min_ios=params.cc_clustering.min_ios) print >> out, "\nRunning CC-based clustering" cc_clusters.do_clustering( nproc=params.cc_clustering.nproc, b_scale=params.cc_clustering.b_scale, use_normalized=params.cc_clustering.use_normalized, cluster_method=params.cc_clustering.method, distance_eqn=params.cc_clustering.cc_to_distance, min_common_refs=params.cc_clustering.min_common_refs, html_maker=html_report) summary_out = os.path.join(ccc_wdir, "cc_cluster_summary.dat") clusters = cc_clusters.show_cluster_summary(d_min=params.d_min, out=open(summary_out, "w")) print >> out, "Clusters were summarized in %s" % summary_out if params.cc_clustering.min_cmpl is not None: clusters = filter(lambda x: x[3] >= params.cc_clustering.min_cmpl, clusters) if params.cc_clustering.min_acmpl is not None: clusters = filter(lambda x: x[5] >= params.cc_clustering.min_acmpl, clusters) if params.cc_clustering.min_redun is not None: clusters = filter(lambda x: x[4] >= params.cc_clustering.min_redun, clusters) if params.cc_clustering.min_aredun is not None: clusters = filter( lambda x: x[6] >= params.cc_clustering.min_aredun, clusters) if params.cc_clustering.max_clheight is not None: clusters = filter( lambda x: x[2] <= params.cc_clustering.max_clheight, clusters) if params.max_clusters is not None and len( clusters) > params.max_clusters: print >> out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % ( params.max_clusters, len(clusters)) clusters = clusters[:params.max_clusters] if clusters: print >> out, "With specified conditions, following %d clusters will be merged:" % len( clusters) else: print >> out, "\nERROR: No clusters satisfied the specified conditions for merging!" print >> out, "Please change criteria of completeness or redundancy" print >> out, "Here is the table of completeness and redundancy for each cluster:\n" print >> out, open(summary_out).read() for clno, IDs, clh, cmpl, redun, acmpl, aredun, ccmean, ccmin in clusters: # process largest first print >> out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f CCmean=% .4f CCmin=% .4f" % ( clno, len(IDs), clh, cmpl, redun, acmpl, aredun, ccmean, ccmin) data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d" % clno), map(lambda x: xds_ascii_files[x - 1], IDs), float("nan"), float("nan"), clh)) print >> out try: html_report.add_clutering_result(clusters, "cc_clustering") except: print >> out, traceback.format_exc() else: data_for_merge.append((os.path.join(params.workdir, "all_data"), xds_ascii_files, float("nan"), float("nan"), 0)) ofs_summary = open(os.path.join(params.workdir, "cluster_summary.dat"), "w") ofs_summary.write( "# d_min= %.3f A\n" % (params.d_min if params.d_min is not None else float("nan"))) ofs_summary.write("# LCV and aLCV are values of all data\n") ofs_summary.write( " cluster ClH LCV aLCV run ds.all ds.used Cmpl Redun I/sigI Rmeas CC1/2 Cmpl.ou Red.ou I/sig.ou Rmeas.ou CC1/2.ou Cmpl.in Red.in I/sig.in Rmeas.in CC1/2.in SigAno.in CCano.in WilsonB Aniso.bst Aniso.wst dmin.est\n" ) out.flush() def write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats): tmps = "%12s %6.2f %4.1f %4.1f %3d %6d %7d %5.1f %5.1f %6.2f %5.1f %5.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %9.1f %8.1f %7.2f %9.2f %9.2f %.2f\n" ofs_summary.write(tmps % ( os.path.relpath(workdir, params.workdir), clh, LCV, aLCV, cycle, len(xds_files), num_files, stats["cmpl"][0], stats["redundancy"][0], stats["i_over_sigma"][0], stats["r_meas"][0], stats["cc_half"][0], stats["cmpl"][2], stats["redundancy"][2], stats["i_over_sigma"][2], stats["r_meas"][2], stats["cc_half"][2], stats["cmpl"][1], stats["redundancy"][1], stats["i_over_sigma"][1], stats["r_meas"][1], stats["cc_half"][1], stats["sig_ano"][1], stats["cc_ano"][1], stats["xtriage_log"].wilson_b, #stats["xtriage_log"].anisotropy, stats["aniso"]["d_min_best"], stats["aniso"]["d_min_worst"], stats["dmin_est"], )) ofs_summary.flush() # write_ofs_summary() if "merging" in params.batch.par_run: params.nproc = params.batch.nproc_each jobs = [] for workdir, xds_files, LCV, aLCV, clh in data_for_merge: if not os.path.exists(workdir): os.makedirs(workdir) shname = "merge_%s.sh" % os.path.relpath(workdir, params.workdir) pickle.dump((params, os.path.abspath(workdir), xds_files, cells, space_group), open(os.path.join(workdir, "args.pkl"), "w"), -1) job = batchjob.Job(workdir, shname, nproc=params.batch.nproc_each) job.write_script("""\ cd "%s" || exit 1 "%s" -c '\ import pickle; \ from yamtbx.dataproc.auto.command_line.multi_merge import merge_datasets; \ args = pickle.load(open("args.pkl")); \ ret = merge_datasets(*args); \ pickle.dump(ret, open("result.pkl","w")); \ ' """ % (os.path.abspath(workdir), sys.executable)) batchjobs.submit(job) jobs.append(job) batchjobs.wait_all(jobs) for workdir, xds_files, LCV, aLCV, clh in data_for_merge: try: results = pickle.load(open(os.path.join(workdir, "result.pkl"))) except: print >> out, "Error in unpickling result in %s" % workdir print >> out, traceback.format_exc() results = [] if len(results) == 0: ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir)) lcv, alcv = float("nan"), float("nan") for cycle, wd, num_files, stats in results: lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV) write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files, num_files, stats) # Last lcv & alcv try: html_report.add_merge_result(workdir, clh, lcv, alcv, xds_files, results[-1][2], results[-1][3]) except: print >> out, traceback.format_exc() else: for workdir, xds_files, LCV, aLCV, clh in data_for_merge: print >> out, "Merging %s..." % os.path.relpath( workdir, params.workdir) out.flush() results = merge_datasets(params, workdir, xds_files, cells, space_group) if len(results) == 0: ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir)) for cycle, wd, num_files, stats in results: lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV) write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files, num_files, stats) try: html_report.add_merge_result(workdir, clh, lcv, alcv, xds_files, results[-1][2], results[-1][3]) except: print >> out, traceback.format_exc() try: html_report.write_html() except: print >> out, traceback.format_exc() print "firefox %s" % os.path.join(html_report.root, "report.html") out.write("\nNormal exit at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S")) out.write("Total wall-clock time: %.2f sec.\n" % (time.time() - time_started)) return
xscale.run_xscale(os.path.join(params.workdir, "XSCALE.INP"), cbf_to_dat=params.cbf_to_dat, use_tmpdir_if_available=params.use_tmpdir_if_available) if params.reference: print "Choosing reference data (reference=%s)" % params.reference ref_idx = xscale.decide_scaling_reference_based_on_bfactor(os.path.join(params.workdir, "XSCALE.LP"), params.reference, return_as="index") if ref_idx != 0: for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(params.workdir, f)) prep_xscale_inp(params.workdir, xscale_inp_head, xac_files, infos, params.frames_per_batch, params.corrections, ref_idx=ref_idx) xscale.run_xscale(os.path.join(params.workdir, "XSCALE.INP"), cbf_to_dat=params.cbf_to_dat, use_tmpdir_if_available=params.use_tmpdir_if_available) # run() if __name__ == "__main__": cmdline = iotbx.phil.process_command_line(args=sys.argv[1:], master_string=master_params_str) params = cmdline.work.extract() xac_files = filter(check_valid_xac, cmdline.remaining_args) if params.lstin: xac_files.extend(filter(check_valid_xac, util.read_path_list(params.lstin))) print "XDS_ASCII.HKL files given:" for f in xac_files: print " %s" % f print run(params, xac_files)
prep_xscale_inp(params.workdir, xscale_inp_head, xac_files, infos, params.frames_per_batch, params.corrections, ref_idx=ref_idx) xscale.run_xscale( os.path.join(params.workdir, "XSCALE.INP"), cbf_to_dat=params.cbf_to_dat, use_tmpdir_if_available=params.use_tmpdir_if_available) # run() if __name__ == "__main__": cmdline = iotbx.phil.process_command_line(args=sys.argv[1:], master_string=master_params_str) params = cmdline.work.extract() xac_files = filter(check_valid_xac, cmdline.remaining_args) if params.lstin: xac_files.extend( filter(check_valid_xac, util.read_path_list(params.lstin))) print "XDS_ASCII.HKL files given:" for f in xac_files: print " %s" % f print run(params, xac_files)
cat(linea, file="./CLUSTERS.txt", append=TRUE) write.table(filenames[sorted_groups], sprintf("cluster%%.3d.lst",i), quote=FALSE, row.names=FALSE, col.names=FALSE) } q(save="yes") """ % (params.prefix, len(arrays), params.prefix, lstin) ) print "R --vanilla < %s_ana.R" % params.prefix # run() if __name__ == "__main__": import sys cmdline = iotbx.phil.process_command_line(args=sys.argv[1:], master_string=master_params_str) params = cmdline.work.extract() args = cmdline.remaining_args lstin = args[0] # run(lstin, params) from yamtbx.dataproc.auto.cc_clustering import CCClustering ccc = CCClustering( wdir=".", xac_files=read_path_list(lstin), d_min=params.d_min, d_max=params.d_max, min_ios=params.min_ios ) ccc.do_clustering(nproc=params.nproc)
def run(params): log_out = multi_out() log_out.register("log", open(params.logfile, "w"), atexit_send_to=None) log_out.register("stdout", sys.stdout) libtbx.phil.parse(master_params_str).format(params).show(out=log_out, prefix=" ") xac_files = read_path_list(params.lstin, only_exists=True, err_out=log_out) if len(xac_files) == 0: print >> log_out, "No (existing) files in the list: %s" % params.lstin return if params.method == "brehm_diederichs": rb = BrehmDiederichs(xac_files, max_delta=params.max_delta, d_min=params.d_min, min_ios=params.min_ios, nproc=params.nproc, log_out=log_out) elif params.method == "selective_breeding": rb = KabschSelectiveBreeding(xac_files, max_delta=params.max_delta, d_min=params.d_min, min_ios=params.min_ios, nproc=params.nproc, log_out=log_out) elif params.method == "reference": import iotbx.file_reader ref_file = iotbx.file_reader.any_file(params.reference_file) if ref_file.file_type == "hkl": ref_arrays = ref_file.file_server.miller_arrays if not ref_arrays: raise "No arrays in reference file" if params.reference_label is not None: ref_arrays = filter( lambda x: params.reference_label in x.info().labels, ref_arrays) if not ref_arrays: raise "No arrays matched to specified label (%s)" % params.reference_label ref_array = ref_arrays[0].as_intensity_array() else: ref_array = None for array in ref_arrays: if array.is_xray_intensity_array(): ref_array = array print >> log_out, "Using %s as reference data" % array.info( ).label_string() break elif array.is_xray_amplitude_array(): ref_array = array.f_as_f_sq() print >> log_out, "Using %s as reference data" % array.info( ).label_string() break elif ref_file.file_type == "pdb": import mmtbx.utils xrs = ref_file.file_content.xray_structure_simple() fmodel_params = mmtbx.command_line.fmodel.fmodel_from_xray_structure_master_params.extract( ) fmodel_params.fmodel.k_sol = 0.35 fmodel_params.fmodel.b_sol = 50 fmodel_params.high_resolution = params.d_min ref_array = mmtbx.utils.fmodel_from_xray_structure( xray_structure=xrs, params=fmodel_params).f_model.as_intensity_array() else: raise "input file type invalid" if ref_array is None: raise "suitable reference data not found" rb = ReferenceBased(xac_files, ref_array, max_delta=params.max_delta, d_min=params.d_min, min_ios=params.min_ios, nproc=params.nproc, log_out=log_out) else: raise "Unknown method: %s" % params.method if rb.bad_files: print "%s: %d bad files are included:" % ( "WARNING" if params.skip_bad_files else "ERROR", len(rb.bad_files)) for f in rb.bad_files: print " %s" % f if not params.skip_bad_files: print print "You may want to change d_min= or min_ios= parameters to include these files." print "Alternatively, specify skip_bad_files=true to ignore these files (they are not included in output files)" return if params.method == "selective_breeding": rb.assign_operators(max_cycle=params.max_cycles) else: rb.assign_operators() rb.show_assign_summary() if params.dry_run: print >> log_out, "This is dry-run. Exiting here." else: out_prefix = os.path.splitext(os.path.basename(params.lstin))[0] ofs_cell = open(out_prefix + "_reindexed_cells.dat", "w") new_files = rb.modify_xds_ascii_files(cells_dat_out=ofs_cell) lstout = out_prefix + "_reindexed.lst" ofs = open(lstout, "w") ofs.write("\n".join(new_files) + "\n") ofs.close() print >> log_out, "Reindexing done. For merging, use %s instead!" % lstout if params.method == "brehm_diederichs": print >> log_out, """ CCTBX-implementation (by Richard Gildea) of the "algorithm 2" of the following paper was used. For publication, please cite: Brehm, W. and Diederichs, K. Breaking the indexing ambiguity in serial crystallography. Acta Cryst. (2014). D70, 101-109 http://dx.doi.org/10.1107/S1399004713025431""" elif params.method == "selective_breeding": print >> log_out, """
def run(params): log_out = multi_out() log_out.register("log", open(params.logfile, "w"), atexit_send_to=None) log_out.register("stdout", sys.stdout) libtbx.phil.parse(master_params_str).format(params).show(out=log_out, prefix=" ") xac_files = read_path_list(params.lstin, only_exists=True, err_out=log_out) if len(xac_files) == 0: print >>log_out, "No (existing) files in the list: %s" % params.lstin return if params.method == "brehm_diederichs": rb = BrehmDiederichs(xac_files, max_delta=params.max_delta, d_min=params.d_min, min_ios=params.min_ios, nproc=params.nproc, log_out=log_out) elif params.method == "selective_breeding": rb = KabschSelectiveBreeding(xac_files, max_delta=params.max_delta, d_min=params.d_min, min_ios=params.min_ios, nproc=params.nproc, log_out=log_out) elif params.method == "reference": import iotbx.file_reader ref_file = iotbx.file_reader.any_file(params.reference_file) if ref_file.file_type == "hkl": ref_arrays = ref_file.file_server.miller_arrays if not ref_arrays: raise "No arrays in reference file" if params.reference_label is not None: ref_arrays = filter(lambda x: params.reference_label in x.info().labels, ref_arrays) if not ref_arrays: raise "No arrays matched to specified label (%s)" % params.reference_label ref_array = ref_arrays[0].as_intensity_array() else: ref_array = None for array in ref_arrays: if array.is_xray_intensity_array(): ref_array = array print >>log_out, "Using %s as reference data" % array.info().label_string() break elif array.is_xray_amplitude_array(): ref_array = array.f_as_f_sq() print >>log_out, "Using %s as reference data" % array.info().label_string() break elif ref_file.file_type == "pdb": import mmtbx.utils xrs = ref_file.file_content.xray_structure_simple() fmodel_params = mmtbx.command_line.fmodel.fmodel_from_xray_structure_master_params.extract() fmodel_params.fmodel.k_sol = 0.35 fmodel_params.fmodel.b_sol = 50 fmodel_params.high_resolution = params.d_min ref_array = mmtbx.utils.fmodel_from_xray_structure(xray_structure=xrs, params=fmodel_params).f_model.as_intensity_array() else: raise "input file type invalid" if ref_array is None: raise "suitable reference data not found" rb = ReferenceBased(xac_files, ref_array, max_delta=params.max_delta, d_min=params.d_min, min_ios=params.min_ios, nproc=params.nproc, log_out=log_out) else: raise "Unknown method: %s" % params.method if params.method == "selective_breeding": rb.assign_operators(max_cycle=params.max_cycles) else: rb.assign_operators() out_prefix = os.path.splitext(os.path.basename(params.lstin))[0] ofs_cell = open(out_prefix+"_reindexed_cells.dat", "w") new_files = rb.modify_xds_ascii_files(cells_dat_out=ofs_cell) lstout = out_prefix + "_reindexed.lst" ofs = open(lstout, "w") ofs.write("\n".join(new_files)+"\n") ofs.close() print >>log_out, "Reindexing done. For merging, use %s instead!" % lstout if params.method == "brehm_diederichs": print >>log_out, """ CCTBX-implementation (by Richard Gildea) of the "algorithm 2" of the following paper was used. For publication, please cite: Brehm, W. and Diederichs, K. Breaking the indexing ambiguity in serial crystallography. Acta Cryst. (2014). D70, 101-109 http://dx.doi.org/10.1107/S1399004713025431""" elif params.method == "selective_breeding": print >>log_out, """
def run(params): log_out = multi_out() log_out.register("log", open(params.logfile, "w"), atexit_send_to=None) log_out.register("stdout", sys.stdout) libtbx.phil.parse(master_params_str).format(params).show(out=log_out, prefix=" ") xac_files = read_path_list(params.lstin, only_exists=True, err_out=log_out) if len(xac_files) == 0: print >> log_out, "No (existing) files in the list: %s" % params.lstin return if params.method == "selective_breeding": rb = KabschSelectiveBreeding(xac_files, max_delta=params.max_delta, d_min=params.d_min, min_ios=params.min_ios, nproc=params.nproc, log_out=log_out, from_p1=params.from_p1) xs = rb.representative_crystal_symmetry() log_out.write("Starting from:\n") xs.show_summary(log_out, " ") log_out.write("\n") rb.assign_operators(max_cycle=params.max_cycles) rb.show_assign_summary() final_cc_means = rb.final_cc_means() assert len(final_cc_means) == len(xac_files) reidx_ops = rb.reindex_operators() sg = copy.copy(xs.space_group()) unit_cell = xs.unit_cell() cc0 = map(lambda x: x[0][1], final_cc_means) log_out.write("Analyzing KabschSelectiveBreeding result..\n") accepted_ops = [] for iop in range(1, len(reidx_ops)): cci = map(lambda x: x[iop][1], final_cc_means) corr = numpy.corrcoef(cc0, cci)[0, 1] log_out.write(" h,k,l vs %s: corr= %.4f\n" % (reidx_ops[iop].as_hkl(), corr)) if corr > 0.5: accepted_ops.append(reidx_ops[iop]) sg.expand_smx(reidx_ops[iop].as_hkl()) unit_cell = unit_cell.change_basis(reidx_ops[iop]) log_out.write(" this operator accepted. sg= %s\n" % sg.info()) log_out.write("Solution:\n") new_xs = crystal.symmetry(unit_cell, space_group=sg) new_xs.show_summary(log_out, " ") log_out.write("As reference setting:\n") new_xs.as_reference_setting().show_summary(log_out, " ") log_out.write("Initial:\n") xs.show_summary(log_out, " ") log_out.write(""" * Notice * Here the space group is deduced from the similarity of reflection intensities under the constraint of lattice symmetry. This could be wrong especially when the crystal is twineed. Please note that space group is only determined when the structure is solved. """) else: raise "invalid method choice (method=%s)" % params.method