def run(params):
    if params.datout is None: params.datout = os.path.basename(params.lstin)+".dat"

    xac_files = read_path_list(params.lstin)
    ofs_dat = open(params.datout, "w")

    ref_v6cell = None
    if params.ref_cell is not None:
        ref_v6cell = v6cell(uctbx.unit_cell(params.ref_cell).niggli_cell())
        ofs_dat.write("# ref_cell= %s\n" % params.ref_cell)

    if params.n_residues is not None: ofs_dat.write("# n_residues= %d\n" % params.n_residues)

    ofs_dat.write("file ioversigma resnatsnr1 wilsonb abdist a b c al be ga\n")

    ret = easy_mp.pool_map(fixed_func=lambda x: calc_stats(x, params.stats, params.n_residues, ref_v6cell,
                                                           params.min_peak, params.min_peak_percentile,
                                                           params.correct_peak),
                           args=xac_files,
                           processes=params.nproc)
    
    for stat in ret:
        getornan = lambda x: stat.get(x, float("nan")) # get or nan
        ofs_dat.write("%s %.3f %.3f %.3f %.3e"%(stat["filename"],
                                                getornan("ioversigma"), getornan("resnatsnr1"),
                                                getornan("wilsonb"), getornan("abdist")))
        ofs_dat.write(" %.3f %.3f %.3f %.2f %.2f %.2f\n" % stat["cell"])

    ofs_dat.close()
def read_dials_phil(phil_in, dials_phil_selection_lst):
    master_str = """\
input {
  experiments = None
   .type = path
   .multiple = true
  reflections = None
   .type = path
   .multiple = true
}
"""
    params = iotbx.phil.process_command_line(
        args=[phil_in], master_string=master_str).work.extract()

    assert len(params.input.experiments) == len(params.input.reflections)
    assert all(map(lambda x: os.path.isfile(x), params.input.experiments))
    assert all(map(lambda x: os.path.isfile(x), params.input.reflections))

    ret = zip(params.input.experiments, params.input.reflections)

    if dials_phil_selection_lst:
        selected_files = set(read_path_list(dials_phil_selection_lst))
        ret = filter(lambda x: x[1] in selected_files, ret)

    return ret
Example #3
0
def run(opts, files):
    if len(files) == 1 and files[0].endswith(".lst"):
        files = read_path_list(files[0])

    make_geom(files[0], os.path.basename(files[0]) + ".geom")

    easy_mp.pool_map(fixed_func=run_each, args=files, processes=opts.nproc)
def run(params):
    if params.datout is None:
        params.datout = os.path.basename(params.lstin) + ".dat"

    xac_files = read_path_list(params.lstin)
    ofs_dat = open(params.datout, "w")

    ref_v6cell = None
    if params.ref_cell is not None:
        ref_v6cell = v6cell(uctbx.unit_cell(params.ref_cell).niggli_cell())
        ofs_dat.write("# ref_cell= %s\n" % params.ref_cell)

    if params.n_residues is not None:
        ofs_dat.write("# n_residues= %d\n" % params.n_residues)

    ofs_dat.write("file ioversigma resnatsnr1 wilsonb abdist a b c al be ga\n")

    ret = easy_mp.pool_map(fixed_func=lambda x: calc_stats(
        x, params.stats, params.n_residues, ref_v6cell, params.min_peak, params
        .min_peak_percentile, params.correct_peak),
                           args=xac_files,
                           processes=params.nproc)

    for stat in ret:
        getornan = lambda x: stat.get(x, float("nan"))  # get or nan
        ofs_dat.write(
            "%s %.3f %.3f %.3f %.3e" %
            (stat["filename"], getornan("ioversigma"), getornan("resnatsnr1"),
             getornan("wilsonb"), getornan("abdist")))
        ofs_dat.write(" %.3f %.3f %.3f %.2f %.2f %.2f\n" % stat["cell"])

    ofs_dat.close()
Example #5
0
def run(opts, files):
    if len(files) == 1 and files[0].endswith(".lst"):
        files = read_path_list(files[0])

    make_geom(files[0], os.path.basename(files[0])+".geom")
    
    easy_mp.pool_map(fixed_func=run_each,
                     args=files,
                     processes=opts.nproc)
def run(params):
    log_out = multi_out()
    log_out.register("log", open(params.logfile, "w"), atexit_send_to=None)
    log_out.register("stdout", sys.stdout)

    libtbx.phil.parse(master_params_str).format(params).show(out=log_out, prefix=" ")

    xac_files = read_path_list(params.lstin, only_exists=True, err_out=log_out)

    if len(xac_files) == 0:
        print >>log_out, "No (existing) files in the list: %s" % params.lstin
        return
    
    if params.method == "brehm_diederichs":
        rb = BrehmDiederichs(xac_files, max_delta=params.max_delta,
                             d_min=params.d_min, min_ios=params.min_ios,
                             nproc=params.nproc, log_out=log_out)
    elif params.method == "selective_breeding":
        rb = KabschSelectiveBreeding(xac_files, max_delta=params.max_delta,
                                     d_min=params.d_min, min_ios=params.min_ios,
                                     nproc=params.nproc, log_out=log_out)
    elif params.method == "reference":
        rb = ReferenceBased(xac_files, params.reference_file, max_delta=params.max_delta,
                            d_min=params.d_min, min_ios=params.min_ios,
                            nproc=params.nproc, log_out=log_out)
    else:
        raise "Unknown method: %s" % params.method

    rb.assign_operators()
    new_files = rb.modify_xds_ascii_files()

    lstout = os.path.splitext(os.path.basename(params.lstin))[0]+"_reindexed.lst"
    ofs = open(lstout, "w")
    ofs.write("\n".join(new_files)+"\n")
    ofs.close()
    print >>log_out, "Reindexing done. For merging, use %s instead!" % lstout

    if params.method == "brehm_diederichs":
        print >>log_out, """
CCTBX-implementation (by Richard Gildea) of the "algorithm 2" of the following paper was used.
For publication, please cite:
 Brehm, W. and Diederichs, K. Breaking the indexing ambiguity in serial crystallography.
 Acta Cryst. (2014). D70, 101-109
 http://dx.doi.org/10.1107/S1399004713025431"""
    elif params.method == "selective_breeding":
        print >>log_out, """
Example #7
0
def run(params, target_files):
    assert params.normalization in ("no", "E")
    ofs = open(params.dat_out, "w")

    xac_files = util.read_path_list(params.lstin)
    targets = read_target_files(target_files, params.d_min, params.d_max,
                                params.normalization, ofs)

    cellcon = CellConstraints(targets.values()[0].space_group())

    #for i, t in enumerate(targets): ofs.write("# target%.3d = %s\n" % (i,t))
    ofs.write("# normalization = %s\n" % params.normalization)
    ofs.write("# d_min, d_max = %s, %s\n" % (params.d_min, params.d_max))
    ofs.write("file %s " % cellcon.get_label_for_free_params())
    ofs.write(" ".join(
        map(lambda x: "cc.%.3d nref.%.3d" % (x, x), xrange(len(targets)))))
    ofs.write("\n")

    for xac_file in xac_files:
        print "reading", xac_file
        xac = xds_ascii.XDS_ASCII(xac_file)
        xac.remove_rejected()
        iobs = xac.i_obs(anomalous_flag=False).merge_equivalents(
            use_internal_variance=False).array()
        ofs.write("%s %s" %
                  (xac_file, cellcon.format_free_params(iobs.unit_cell())))
        fail_flag = False
        if params.normalization == "E":
            try:
                normaliser = kernel_normalisation(iobs, auto_kernel=True)
                iobs = iobs.customized_copy(
                    data=iobs.data() / normaliser.normalizer_for_miller_array,
                    sigmas=iobs.sigmas() /
                    normaliser.normalizer_for_miller_array)
            except:
                fail_flag = True

        for i, ta in enumerate(targets.values()):
            if fail_flag:
                ofs.write(" % .4f %4d" % cc_num)
            else:
                cc_num = calc_cc(iobs, ta)
                ofs.write(" % .4f %4d" % cc_num)

        ofs.write("\n")
Example #8
0
def run(params, args):
    ref_xs = None

    if None not in (params.ref_cell, params.ref_symm):
        ref_xs = crystal.symmetry(params.ref_cell, params.ref_symm)

    if len(args) == 1 and args[0].endswith(".lst"):
        args = read_path_list(args[0])

    if len(args) == 0: return

    angles = []

    for arg in args:
        if xds_ascii.is_xds_ascii(arg):
            angles.extend(from_xds_ascii(arg, ref_xs))
        else:
            angles.extend(from_xparm(arg, ref_xs))

    if params.dat_out: make_dat(angles, params.dat_out)
    if params.plot_out: make_plot(angles, params.plot_out)
Example #9
0
def run(params):
    ref_xs = None

    if None not in (params.ref_cell, params.ref_symm):
        ref_xs = crystal.symmetry(params.ref_cell, params.ref_symm)

    if len(params.input) == 1 and params.input[0].endswith(".lst"):
        params.input = read_path_list(params.input[0])

    if len(params.input) == 0: return

    angles = []

    for arg in params.input:
        if ".stream" in arg:
            angles.extend(from_crystfel_stream(arg, ref_xs))
        elif xds_ascii.is_xds_ascii(arg):
            angles.extend(from_xds_ascii(arg, ref_xs))
        else:
            angles.extend(from_xparm(arg, ref_xs))

    if params.dat_out: make_dat(angles, params.dat_out)
    if params.plot_out: make_plot(angles, params.plot_out)
Example #10
0
def run(lstin, params):
    xac_files = read_path_list(lstin)

    common0 = len(os.path.commonprefix(xac_files))

    arrays = []

    for f in xac_files:
        xac = XDS_ASCII(f, i_only=True)
        xac.remove_rejected()
        a = xac.i_obs().resolution_filter(d_min=params.d_min, d_max=params.d_max)
        a = a.merge_equivalents(use_internal_variance=False).array()
        a = a.select(a.data() / a.sigmas() >= params.min_ios)
        arrays.append(a)

    # Prep
    args = []
    for i in xrange(len(arrays) - 1):
        for j in xrange(i + 1, len(arrays)):
            args.append((i, j))

    # Calc all CC
    worker = lambda x: calc_cc(arrays[x[0]], arrays[x[1]])
    results = easy_mp.pool_map(fixed_func=worker, args=args, processes=params.nproc)

    # Make matrix
    mat = numpy.zeros(shape=(len(arrays), len(arrays)))
    for (i, j), (cc, nref) in zip(args, results):
        print j, i, cc
        mat[j, i] = cc

    open("%s.names" % params.prefix, "w").write("\n".join(map(lambda x: os.path.dirname(x[common0:]), xac_files)))
    open("%s.matrix" % params.prefix, "w").write(" ".join(map(lambda x: "%.4f" % x, mat.flatten())))

    ofs = open("%s.dat" % params.prefix, "w")
    ofs.write("i j cc nref\n")
    for (i, j), (cc, nref) in zip(args, results):
        ofs.write("%4d %4d %.4f %4d\n" % (i, j, cc, nref))

    open("%s_ana.R" % params.prefix, "w").write(
        """\
treeToList2 <- function(htree)
{  # stolen from $CCP4/share/blend/R/blend0.R
 groups <- list()
 itree <- dim(htree$merge)[1]
 for (i in 1:itree)
 { 
  il <- htree$merge[i,1]
  ir <- htree$merge[i,2]
  if (il < 0) lab1 <- htree$labels[-il]
  if (ir < 0) lab2 <- htree$labels[-ir]
  if (il > 0) lab1 <- groups[[il]]
  if (ir > 0) lab2 <- groups[[ir]]
  lab <- c(lab1,lab2)
  lab <- as.integer(lab)
  groups <- c(groups,list(lab))
 }
 return(groups)
}

cc<-scan("%s.matrix")
md<-matrix(1-cc, ncol=%d, byrow=TRUE)
labs<-read.table("%s.names")
filenames<-read.table("%s")$V1
rownames(md)<-labs$V1
hc <- hclust(as.dist(md),method="ward")
pdf("tree.pdf")
plot(hc)
dev.off()

hc$labels <- 1:nrow(md)
groups <- treeToList2(hc)
cat("ClNumber             Nds         Clheight\\n",file="./CLUSTERS.txt")
for (i in 1:length(groups))
{
 sorted_groups <- sort(groups[[i]])
 linea <- paste(sprintf("     %%03d           %%3d         %%7.3f\\n",
                i,length(groups[[i]]),hc$height[i]),sep="")
 cat(linea, file="./CLUSTERS.txt", append=TRUE)
 write.table(filenames[sorted_groups], sprintf("cluster%%.3d.lst",i), quote=FALSE, row.names=FALSE, col.names=FALSE)
}

q(save="yes")
"""
        % (params.prefix, len(arrays), params.prefix, lstin)
    )
    print "R --vanilla < %s_ana.R" % params.prefix
Example #11
0
def run(params):
    if not params.workdir:
        print "Give workdir="
        return
    if os.path.exists(params.workdir):
        print "workdir already exists:", params.workdir
        return

    params.workdir = os.path.abspath(params.workdir)

    if None not in (params.unit_cell, params.space_group):
        user_xs = crystal.symmetry(params.unit_cell, params.space_group)
    else:
        user_xs = None

    from yamtbx.dataproc.auto.command_line.multi_check_cell_consistency import CellGraph

    cm = CellGraph(tol_length=params.cell_grouping.tol_length,
                   tol_angle=params.cell_grouping.tol_angle)

    if len(params.xdsdir) == 1 and os.path.isfile(params.xdsdir[0]):
        params.xdsdir = util.read_path_list(params.xdsdir[0])

    xds_dirs = []
    for xd in params.xdsdir:
        xds_dirs.extend(
            map(
                lambda x: x[0],
                filter(
                    lambda x: any(
                        map(lambda y: y.startswith("XDS_ASCII.HKL"), x[2])) or
                    "DIALS.HKL" in x[2], os.walk(os.path.abspath(xd)))))

    for i, xd in enumerate(xds_dirs):
        cm.add_proc_result(i, xd)

    pm = PrepMerging(cm)
    print pm.find_groups()

    if len(cm.groups) == 0:
        print "Oh, no. No data."
        return

    if params.group_choice is None:
        while True:
            try:
                val = int(
                    raw_input("Input group number [%d..%d]: " %
                              (1, len(cm.groups))))
                if not 0 < val <= len(cm.groups): raise ValueError
                params.group_choice = val
                break
            except ValueError:
                continue

    symms = cm.get_selectable_symms(params.group_choice - 1)
    symmidx = -1

    if user_xs:
        #for xs in cm.get_selectable_symms(params.group_choice):
        raise "Not supported now."

    while True:
        try:
            val = int(
                raw_input("Input symmetry number [%d..%d]: " %
                          (0, len(symms) - 1)))
            if not 0 <= val < len(symms): raise ValueError
            symmidx = val
            break
        except ValueError:
            continue

    os.mkdir(params.workdir)

    topdir = os.path.dirname(os.path.commonprefix(xds_dirs))

    pm.prep_merging(group=params.group_choice,
                    symmidx=symmidx,
                    workdir=params.workdir,
                    topdir=topdir,
                    cell_method=params.cell_method,
                    nproc=params.nproc,
                    prep_dials_files=params.prep_dials_files,
                    into_workdir=params.copy_into_workdir)
    pm.write_merging_scripts(params.workdir, "par", params.prep_dials_files)
Example #12
0
def run(params):
    if os.path.isdir(params.workdir) and os.listdir(params.workdir):
        print "Directory already exists and not empty:", params.workdir
        return

    # Check parameters
    if params.program == "xscale":
        if (params.xscale.frames_per_batch,
                params.xscale.degrees_per_batch).count(None) == 0:
            print "ERROR! You can't specify both of xscale.frames_per_batch and xscale.degrees_per_batch"
            return

    if params.reference_file is not None and params.program != "xscale":
        print "WARNING - reference file is not used unless program=xscale."

    if not os.path.isdir(params.workdir):
        os.makedirs(params.workdir)

    if params.batch.engine == "sge":
        batchjobs = batchjob.SGE(pe_name=params.batch.sge_pe_name)
    elif params.batch.engine == "sh":
        batchjobs = batchjob.ExecLocal(max_parallel=params.batch.sh_max_jobs)
    else:
        raise "Unknown batch engine: %s" % params.batch.engine

    out = multi_out()
    out.register("log",
                 open(os.path.join(params.workdir, "multi_merge.log"), "w"),
                 atexit_send_to=None)
    out.register("stdout", sys.stdout)
    out.write("kamo.multi_merge started at %s\n\n" %
              time.strftime("%Y-%m-%d %H:%M:%S"))
    time_started = time.time()

    print >> out, "Paramters:"
    libtbx.phil.parse(master_params_str).format(params).show(out=out,
                                                             prefix=" ")
    print >> out, ""

    # XXX Not works when clustering is used..
    html_report = multi_merging.html_report.HtmlReportMulti(
        os.path.abspath(params.workdir))
    try:
        html_report.add_params(params, master_params_str)
    except:
        print >> out, traceback.format_exc()

    xds_ascii_files = util.read_path_list(params.lstin,
                                          only_exists=True,
                                          as_abspath=True,
                                          err_out=out)

    if not xds_ascii_files:
        print >> out, "ERROR! Cannot find (existing) files in %s." % params.lstin
        return

    if len(xds_ascii_files) < 2:
        print >> out, "ERROR! Only one file in %s." % params.lstin
        print >> out, "       Give at least two files for merging."
        return

    cells = collections.OrderedDict()
    laues = {}  # for check
    for xac in xds_ascii_files:
        try:
            symm = XDS_ASCII(xac, read_data=False).symm
        except:
            print >> out, "Error in reading %s" % xac
            print >> out, traceback.format_exc()
            return
        cells[xac] = symm.unit_cell().parameters()
        laue = symm.space_group().build_derived_reflection_intensity_group(
            False).info()
        laues.setdefault(str(laue), {}).setdefault(
            symm.space_group_info().type().number(), []).append(xac)

    if len(laues) > 1:
        print >> out, "ERROR! more than one space group included."
        for laue in laues:
            print "Laue symmetry", laue
            for sg in laues[laue]:
                print >> out, " SPACE_GROUP_NUMBER= %d (%d data)" % (
                    sg, len(laues[laue][sg]))
                for f in laues[laue][sg]:
                    print >> out, "  %s" % f
                print >> out, ""
        return

    space_group = None
    if params.space_group is not None:
        space_group = sgtbx.space_group_info(params.space_group).group()
        laue_given = str(
            space_group.build_derived_reflection_intensity_group(False).info())
        if laue_given != laues.keys()[0]:
            print >> out, "ERROR! user-specified space group (space_group=%s) is not compatible with input files (%s)" % (
                params.space_group, laues.keys()[0])
            return

        sg_refset = space_group.info().as_reference_setting().group()
        if space_group != sg_refset:
            print >> out, "Sorry! currently space group in non-reference setting is not supported."
            print >> out, "(You requested %s, which is different from reference setting: %s)" % (
                space_group.info(), sg_refset.info())
            return
    else:
        tmp = sgtbx.space_group_info(
            laues.values()[0].keys()
            [0]).group().build_derived_reflection_intensity_group(True)
        print >> out, "Space group for merging:", tmp.info()

    test_flag_will_be_transferred = False

    if params.reference.data is not None:
        params.reference.data = os.path.abspath(params.reference.data)
        print >> out, "Reading reference data file: %s" % params.reference.data

        tmp = iotbx.file_reader.any_file(params.reference.data,
                                         force_type="hkl",
                                         raise_sorry_if_errors=True)
        if params.reference.copy_test_flag:
            from yamtbx.dataproc.command_line import copy_free_R_flag
            if None in copy_free_R_flag.get_flag_array(
                    tmp.file_server.miller_arrays, log_out=out):
                print >> out, " Warning: no test flag found in reference file (%s)" % params.reference.data
            else:
                test_flag_will_be_transferred = True
                print >> out, " test flag will be transferred"

        if space_group is not None:
            if space_group != tmp.file_server.miller_arrays[0].space_group():
                print >> out, " ERROR! space_group=(%s) and that of reference.data (%s) do not match." % (
                    space_group.info(),
                    tmp.file_server.miller_arrays[0].space_group_info())
                return
        else:
            space_group = tmp.file_server.miller_arrays[0].space_group()
            print >> out, " space group for merging: %s" % space_group.info()

    if params.add_test_flag:
        if test_flag_will_be_transferred:
            print >> out, "Warning: add_test_flag=True was set, but the flag will be transferred from the reference file given."
        else:
            from cctbx import r_free_utils

            med_cell = numpy.median(cells.values(), axis=0)
            d_min = max(
                params.d_min - 0.2, 1.0
            ) if params.d_min is not None else 1.5  # to prevent infinite set
            sg = space_group
            if not sg:
                sg = sgtbx.space_group_info(
                    laues.values()[0].keys()
                    [0]).group().build_derived_reflection_intensity_group(True)
            tmp = miller.build_set(crystal.symmetry(tuple(med_cell),
                                                    space_group=sg),
                                   False,
                                   d_min=d_min,
                                   d_max=None)
            print >> out, "Generating test set using the reference symmetry:"
            crystal.symmetry.show_summary(tmp, out, " ")
            tmp = tmp.generate_r_free_flags(fraction=0.05,
                                            max_free=None,
                                            lattice_symmetry_max_delta=5.0,
                                            use_lattice_symmetry=True,
                                            n_shells=20)
            tmp.show_r_free_flags_info(out=out, prefix=" ")
            tmp = tmp.customized_copy(
                data=r_free_utils.export_r_free_flags_for_ccp4(
                    flags=tmp.data(), test_flag_value=True))

            mtz_object = tmp.as_mtz_dataset(
                column_root_label="FreeR_flag").mtz_object()
            test_flag_mtz = os.path.abspath(
                os.path.join(params.workdir, "test_flag.mtz"))
            mtz_object.write(file_name=test_flag_mtz)

            # Override the parameters
            params.reference.copy_test_flag = True
            params.reference.data = test_flag_mtz

    try:
        html_report.add_cells_and_files(cells, laues.keys()[0])
    except:
        print >> out, traceback.format_exc()

    data_for_merge = []
    if params.clustering == "blend":
        if params.blend.use_old_result is None:
            blend_wdir = os.path.join(params.workdir, "blend")
            os.mkdir(blend_wdir)
            blend.run_blend0R(blend_wdir, xds_ascii_files)
            print >> out, "\nRunning BLEND with analysis mode"
        else:
            blend_wdir = params.blend.use_old_result
            print >> out, "\nUsing precalculated BLEND result in %s" % params.blend.use_old_result

        blend_clusters = blend.BlendClusters(workdir=blend_wdir,
                                             d_min=params.d_min)
        summary_out = os.path.join(blend_wdir, "blend_cluster_summary.dat")
        clusters = blend_clusters.show_cluster_summary(
            out=open(summary_out, "w"))
        print >> out, "Clusters found by BLEND were summarized in %s" % summary_out

        if params.blend.min_cmpl is not None:
            clusters = filter(lambda x: x[3] >= params.blend.min_cmpl,
                              clusters)
        if params.blend.min_acmpl is not None:
            clusters = filter(lambda x: x[5] >= params.blend.min_acmpl,
                              clusters)
        if params.blend.min_redun is not None:
            clusters = filter(lambda x: x[4] >= params.blend.min_redun,
                              clusters)
        if params.blend.min_aredun is not None:
            clusters = filter(lambda x: x[6] >= params.blend.min_aredun,
                              clusters)
        if params.blend.max_LCV is not None:
            clusters = filter(lambda x: x[7] <= params.blend.max_LCV, clusters)
        if params.blend.max_aLCV is not None:
            clusters = filter(lambda x: x[8] <= params.blend.max_aLCV,
                              clusters)

        if params.max_clusters is not None and len(
                clusters) > params.max_clusters:
            print >> out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % (
                params.max_clusters, len(clusters))
            clusters = clusters[:params.max_clusters]

        if clusters:
            print >> out, "With specified conditions, following %d clusters will be merged:" % len(
                clusters)
        else:
            print >> out, "\nERROR: No clusters satisfied the specified conditions for merging!"
            print >> out, "Please change criteria of completeness or redundancy"
            print >> out, "Here is the table of completeness and redundancy for each cluster:\n"
            print >> out, open(summary_out).read()

        for clno, IDs, clh, cmpl, redun, acmpl, aredun, LCV, aLCV in clusters:  # process largest first
            print >> out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f LCV= %5.1f aLCV=%5.1f" % (
                clno, len(IDs), clh, cmpl, redun, acmpl, aredun, LCV, aLCV)
            data_for_merge.append((os.path.join(params.workdir,
                                                "cluster_%.4d" % clno),
                                   map(lambda x: blend_clusters.files[x - 1],
                                       IDs), LCV, aLCV, clh))
        print >> out
        try:
            html_report.add_clutering_result(clusters, "blend")
        except:
            print >> out, traceback.format_exc()

    elif params.clustering == "cc":
        ccc_wdir = os.path.join(params.workdir, "cc_clustering")
        os.mkdir(ccc_wdir)
        cc_clusters = cc_clustering.CCClustering(
            ccc_wdir,
            xds_ascii_files,
            d_min=params.cc_clustering.d_min
            if params.cc_clustering.d_min is not None else params.d_min,
            min_ios=params.cc_clustering.min_ios)
        print >> out, "\nRunning CC-based clustering"

        cc_clusters.do_clustering(
            nproc=params.cc_clustering.nproc,
            b_scale=params.cc_clustering.b_scale,
            use_normalized=params.cc_clustering.use_normalized,
            cluster_method=params.cc_clustering.method,
            distance_eqn=params.cc_clustering.cc_to_distance,
            min_common_refs=params.cc_clustering.min_common_refs,
            html_maker=html_report)
        summary_out = os.path.join(ccc_wdir, "cc_cluster_summary.dat")
        clusters = cc_clusters.show_cluster_summary(d_min=params.d_min,
                                                    out=open(summary_out, "w"))
        print >> out, "Clusters were summarized in %s" % summary_out

        if params.cc_clustering.min_cmpl is not None:
            clusters = filter(lambda x: x[3] >= params.cc_clustering.min_cmpl,
                              clusters)
        if params.cc_clustering.min_acmpl is not None:
            clusters = filter(lambda x: x[5] >= params.cc_clustering.min_acmpl,
                              clusters)
        if params.cc_clustering.min_redun is not None:
            clusters = filter(lambda x: x[4] >= params.cc_clustering.min_redun,
                              clusters)
        if params.cc_clustering.min_aredun is not None:
            clusters = filter(
                lambda x: x[6] >= params.cc_clustering.min_aredun, clusters)
        if params.cc_clustering.max_clheight is not None:
            clusters = filter(
                lambda x: x[2] <= params.cc_clustering.max_clheight, clusters)

        if params.max_clusters is not None and len(
                clusters) > params.max_clusters:
            print >> out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % (
                params.max_clusters, len(clusters))
            clusters = clusters[:params.max_clusters]

        if clusters:
            print >> out, "With specified conditions, following %d clusters will be merged:" % len(
                clusters)
        else:
            print >> out, "\nERROR: No clusters satisfied the specified conditions for merging!"
            print >> out, "Please change criteria of completeness or redundancy"
            print >> out, "Here is the table of completeness and redundancy for each cluster:\n"
            print >> out, open(summary_out).read()

        for clno, IDs, clh, cmpl, redun, acmpl, aredun, ccmean, ccmin in clusters:  # process largest first
            print >> out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f CCmean=% .4f CCmin=% .4f" % (
                clno, len(IDs), clh, cmpl, redun, acmpl, aredun, ccmean, ccmin)
            data_for_merge.append((os.path.join(params.workdir,
                                                "cluster_%.4d" % clno),
                                   map(lambda x: xds_ascii_files[x - 1],
                                       IDs), float("nan"), float("nan"), clh))
        print >> out

        try:
            html_report.add_clutering_result(clusters, "cc_clustering")
        except:
            print >> out, traceback.format_exc()

    else:
        data_for_merge.append((os.path.join(params.workdir,
                                            "all_data"), xds_ascii_files,
                               float("nan"), float("nan"), 0))

    ofs_summary = open(os.path.join(params.workdir, "cluster_summary.dat"),
                       "w")
    ofs_summary.write(
        "# d_min= %.3f A\n" %
        (params.d_min if params.d_min is not None else float("nan")))
    ofs_summary.write("# LCV and aLCV are values of all data\n")
    ofs_summary.write(
        "     cluster    ClH  LCV aLCV run ds.all ds.used  Cmpl Redun I/sigI Rmeas CC1/2 Cmpl.ou Red.ou I/sig.ou Rmeas.ou CC1/2.ou Cmpl.in Red.in I/sig.in Rmeas.in CC1/2.in SigAno.in CCano.in WilsonB Aniso.bst Aniso.wst dmin.est\n"
    )

    out.flush()

    def write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files,
                          stats):
        tmps = "%12s %6.2f %4.1f %4.1f %3d %6d %7d %5.1f %5.1f %6.2f %5.1f %5.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %9.1f %8.1f %7.2f %9.2f %9.2f %.2f\n"
        ofs_summary.write(tmps % (
            os.path.relpath(workdir, params.workdir),
            clh,
            LCV,
            aLCV,
            cycle,
            len(xds_files),
            num_files,
            stats["cmpl"][0],
            stats["redundancy"][0],
            stats["i_over_sigma"][0],
            stats["r_meas"][0],
            stats["cc_half"][0],
            stats["cmpl"][2],
            stats["redundancy"][2],
            stats["i_over_sigma"][2],
            stats["r_meas"][2],
            stats["cc_half"][2],
            stats["cmpl"][1],
            stats["redundancy"][1],
            stats["i_over_sigma"][1],
            stats["r_meas"][1],
            stats["cc_half"][1],
            stats["sig_ano"][1],
            stats["cc_ano"][1],
            stats["xtriage_log"].wilson_b,
            #stats["xtriage_log"].anisotropy,
            stats["aniso"]["d_min_best"],
            stats["aniso"]["d_min_worst"],
            stats["dmin_est"],
        ))
        ofs_summary.flush()

    # write_ofs_summary()

    if "merging" in params.batch.par_run:
        params.nproc = params.batch.nproc_each
        jobs = []
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            if not os.path.exists(workdir): os.makedirs(workdir)
            shname = "merge_%s.sh" % os.path.relpath(workdir, params.workdir)
            pickle.dump((params, os.path.abspath(workdir), xds_files, cells,
                         space_group),
                        open(os.path.join(workdir, "args.pkl"), "w"), -1)
            job = batchjob.Job(workdir, shname, nproc=params.batch.nproc_each)
            job.write_script("""\
cd "%s" || exit 1
"%s" -c '\
import pickle; \
from yamtbx.dataproc.auto.command_line.multi_merge import merge_datasets; \
args = pickle.load(open("args.pkl")); \
ret = merge_datasets(*args); \
pickle.dump(ret, open("result.pkl","w")); \
'
""" % (os.path.abspath(workdir), sys.executable))
            batchjobs.submit(job)
            jobs.append(job)

        batchjobs.wait_all(jobs)
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            try:
                results = pickle.load(open(os.path.join(workdir,
                                                        "result.pkl")))
            except:
                print >> out, "Error in unpickling result in %s" % workdir
                print >> out, traceback.format_exc()
                results = []

            if len(results) == 0:
                ofs_summary.write("#%s failed\n" %
                                  os.path.relpath(workdir, params.workdir))

            lcv, alcv = float("nan"), float("nan")
            for cycle, wd, num_files, stats in results:
                lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV)
                write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files,
                                  num_files, stats)

            # Last lcv & alcv
            try:
                html_report.add_merge_result(workdir, clh, lcv, alcv,
                                             xds_files, results[-1][2],
                                             results[-1][3])
            except:
                print >> out, traceback.format_exc()
    else:
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            print >> out, "Merging %s..." % os.path.relpath(
                workdir, params.workdir)
            out.flush()
            results = merge_datasets(params, workdir, xds_files, cells,
                                     space_group)

            if len(results) == 0:
                ofs_summary.write("#%s failed\n" %
                                  os.path.relpath(workdir, params.workdir))

            for cycle, wd, num_files, stats in results:
                lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV)
                write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files,
                                  num_files, stats)

            try:
                html_report.add_merge_result(workdir, clh, lcv, alcv,
                                             xds_files, results[-1][2],
                                             results[-1][3])
            except:
                print >> out, traceback.format_exc()

    try:
        html_report.write_html()
    except:
        print >> out, traceback.format_exc()

    print "firefox %s" % os.path.join(html_report.root, "report.html")

    out.write("\nNormal exit at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S"))
    out.write("Total wall-clock time: %.2f sec.\n" %
              (time.time() - time_started))

    return
Example #13
0
    xscale.run_xscale(os.path.join(params.workdir, "XSCALE.INP"),
                      cbf_to_dat=params.cbf_to_dat,
                      use_tmpdir_if_available=params.use_tmpdir_if_available)

    if params.reference:
        print "Choosing reference data (reference=%s)" % params.reference
        ref_idx = xscale.decide_scaling_reference_based_on_bfactor(os.path.join(params.workdir, "XSCALE.LP"), params.reference, return_as="index")
        if ref_idx != 0:
            for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(params.workdir, f))
            prep_xscale_inp(params.workdir, xscale_inp_head, xac_files, infos, params.frames_per_batch, params.corrections, ref_idx=ref_idx)
            xscale.run_xscale(os.path.join(params.workdir, "XSCALE.INP"),
                              cbf_to_dat=params.cbf_to_dat,
                              use_tmpdir_if_available=params.use_tmpdir_if_available)

# run()

if __name__ == "__main__":
    cmdline = iotbx.phil.process_command_line(args=sys.argv[1:],
                                              master_string=master_params_str)
    params = cmdline.work.extract()
    xac_files = filter(check_valid_xac, cmdline.remaining_args)
    if params.lstin:
        xac_files.extend(filter(check_valid_xac, util.read_path_list(params.lstin)))

    print "XDS_ASCII.HKL files given:"
    for f in xac_files:
        print " %s" % f
    print

    run(params, xac_files)
Example #14
0
            prep_xscale_inp(params.workdir,
                            xscale_inp_head,
                            xac_files,
                            infos,
                            params.frames_per_batch,
                            params.corrections,
                            ref_idx=ref_idx)
            xscale.run_xscale(
                os.path.join(params.workdir, "XSCALE.INP"),
                cbf_to_dat=params.cbf_to_dat,
                use_tmpdir_if_available=params.use_tmpdir_if_available)


# run()

if __name__ == "__main__":
    cmdline = iotbx.phil.process_command_line(args=sys.argv[1:],
                                              master_string=master_params_str)
    params = cmdline.work.extract()
    xac_files = filter(check_valid_xac, cmdline.remaining_args)
    if params.lstin:
        xac_files.extend(
            filter(check_valid_xac, util.read_path_list(params.lstin)))

    print "XDS_ASCII.HKL files given:"
    for f in xac_files:
        print " %s" % f
    print

    run(params, xac_files)
Example #15
0
 cat(linea, file="./CLUSTERS.txt", append=TRUE)
 write.table(filenames[sorted_groups], sprintf("cluster%%.3d.lst",i), quote=FALSE, row.names=FALSE, col.names=FALSE)
}

q(save="yes")
"""
        % (params.prefix, len(arrays), params.prefix, lstin)
    )
    print "R --vanilla < %s_ana.R" % params.prefix


# run()


if __name__ == "__main__":
    import sys

    cmdline = iotbx.phil.process_command_line(args=sys.argv[1:], master_string=master_params_str)
    params = cmdline.work.extract()
    args = cmdline.remaining_args

    lstin = args[0]
    # run(lstin, params)
    from yamtbx.dataproc.auto.cc_clustering import CCClustering

    ccc = CCClustering(
        wdir=".", xac_files=read_path_list(lstin), d_min=params.d_min, d_max=params.d_max, min_ios=params.min_ios
    )

    ccc.do_clustering(nproc=params.nproc)
def run(params):
    log_out = multi_out()
    log_out.register("log", open(params.logfile, "w"), atexit_send_to=None)
    log_out.register("stdout", sys.stdout)

    libtbx.phil.parse(master_params_str).format(params).show(out=log_out,
                                                             prefix=" ")

    xac_files = read_path_list(params.lstin, only_exists=True, err_out=log_out)

    if len(xac_files) == 0:
        print >> log_out, "No (existing) files in the list: %s" % params.lstin
        return

    if params.method == "brehm_diederichs":
        rb = BrehmDiederichs(xac_files,
                             max_delta=params.max_delta,
                             d_min=params.d_min,
                             min_ios=params.min_ios,
                             nproc=params.nproc,
                             log_out=log_out)
    elif params.method == "selective_breeding":
        rb = KabschSelectiveBreeding(xac_files,
                                     max_delta=params.max_delta,
                                     d_min=params.d_min,
                                     min_ios=params.min_ios,
                                     nproc=params.nproc,
                                     log_out=log_out)
    elif params.method == "reference":
        import iotbx.file_reader

        ref_file = iotbx.file_reader.any_file(params.reference_file)
        if ref_file.file_type == "hkl":
            ref_arrays = ref_file.file_server.miller_arrays
            if not ref_arrays:
                raise "No arrays in reference file"
            if params.reference_label is not None:
                ref_arrays = filter(
                    lambda x: params.reference_label in x.info().labels,
                    ref_arrays)
                if not ref_arrays:
                    raise "No arrays matched to specified label (%s)" % params.reference_label
                ref_array = ref_arrays[0].as_intensity_array()
            else:
                ref_array = None
                for array in ref_arrays:
                    if array.is_xray_intensity_array():
                        ref_array = array
                        print >> log_out, "Using %s as reference data" % array.info(
                        ).label_string()
                        break
                    elif array.is_xray_amplitude_array():
                        ref_array = array.f_as_f_sq()
                        print >> log_out, "Using %s as reference data" % array.info(
                        ).label_string()
                        break
        elif ref_file.file_type == "pdb":
            import mmtbx.utils
            xrs = ref_file.file_content.xray_structure_simple()
            fmodel_params = mmtbx.command_line.fmodel.fmodel_from_xray_structure_master_params.extract(
            )
            fmodel_params.fmodel.k_sol = 0.35
            fmodel_params.fmodel.b_sol = 50
            fmodel_params.high_resolution = params.d_min
            ref_array = mmtbx.utils.fmodel_from_xray_structure(
                xray_structure=xrs,
                params=fmodel_params).f_model.as_intensity_array()
        else:
            raise "input file type invalid"

        if ref_array is None:
            raise "suitable reference data not found"

        rb = ReferenceBased(xac_files,
                            ref_array,
                            max_delta=params.max_delta,
                            d_min=params.d_min,
                            min_ios=params.min_ios,
                            nproc=params.nproc,
                            log_out=log_out)
    else:
        raise "Unknown method: %s" % params.method

    if rb.bad_files:
        print "%s: %d bad files are included:" % (
            "WARNING" if params.skip_bad_files else "ERROR", len(rb.bad_files))
        for f in rb.bad_files:
            print "  %s" % f
        if not params.skip_bad_files:
            print
            print "You may want to change d_min= or min_ios= parameters to include these files."
            print "Alternatively, specify skip_bad_files=true to ignore these files (they are not included in output files)"
            return

    if params.method == "selective_breeding":
        rb.assign_operators(max_cycle=params.max_cycles)
    else:
        rb.assign_operators()

    rb.show_assign_summary()

    if params.dry_run:
        print >> log_out, "This is dry-run. Exiting here."
    else:
        out_prefix = os.path.splitext(os.path.basename(params.lstin))[0]

        ofs_cell = open(out_prefix + "_reindexed_cells.dat", "w")
        new_files = rb.modify_xds_ascii_files(cells_dat_out=ofs_cell)

        lstout = out_prefix + "_reindexed.lst"
        ofs = open(lstout, "w")
        ofs.write("\n".join(new_files) + "\n")
        ofs.close()
        print >> log_out, "Reindexing done. For merging, use %s instead!" % lstout

    if params.method == "brehm_diederichs":
        print >> log_out, """
CCTBX-implementation (by Richard Gildea) of the "algorithm 2" of the following paper was used.
For publication, please cite:
 Brehm, W. and Diederichs, K. Breaking the indexing ambiguity in serial crystallography.
 Acta Cryst. (2014). D70, 101-109
 http://dx.doi.org/10.1107/S1399004713025431"""
    elif params.method == "selective_breeding":
        print >> log_out, """
def run(params):
    log_out = multi_out()
    log_out.register("log", open(params.logfile, "w"), atexit_send_to=None)
    log_out.register("stdout", sys.stdout)

    libtbx.phil.parse(master_params_str).format(params).show(out=log_out, prefix=" ")

    xac_files = read_path_list(params.lstin, only_exists=True, err_out=log_out)

    if len(xac_files) == 0:
        print >>log_out, "No (existing) files in the list: %s" % params.lstin
        return
    
    if params.method == "brehm_diederichs":
        rb = BrehmDiederichs(xac_files, max_delta=params.max_delta,
                             d_min=params.d_min, min_ios=params.min_ios,
                             nproc=params.nproc, log_out=log_out)
    elif params.method == "selective_breeding":
        rb = KabschSelectiveBreeding(xac_files, max_delta=params.max_delta,
                                     d_min=params.d_min, min_ios=params.min_ios,
                                     nproc=params.nproc, log_out=log_out)
    elif params.method == "reference":
        import iotbx.file_reader

        ref_file = iotbx.file_reader.any_file(params.reference_file)
        if ref_file.file_type == "hkl":
            ref_arrays = ref_file.file_server.miller_arrays
            if not ref_arrays:
                raise "No arrays in reference file"
            if params.reference_label is not None:
                ref_arrays = filter(lambda x: params.reference_label in x.info().labels, ref_arrays)
                if not ref_arrays: raise "No arrays matched to specified label (%s)" % params.reference_label
                ref_array = ref_arrays[0].as_intensity_array()
            else:
                ref_array = None
                for array in ref_arrays:
                    if array.is_xray_intensity_array():
                        ref_array = array
                        print >>log_out, "Using %s as reference data" % array.info().label_string()
                        break
                    elif array.is_xray_amplitude_array():
                        ref_array = array.f_as_f_sq()
                        print >>log_out, "Using %s as reference data" % array.info().label_string()
                        break
        elif ref_file.file_type == "pdb":
            import mmtbx.utils
            xrs = ref_file.file_content.xray_structure_simple()
            fmodel_params = mmtbx.command_line.fmodel.fmodel_from_xray_structure_master_params.extract()
            fmodel_params.fmodel.k_sol = 0.35
            fmodel_params.fmodel.b_sol = 50
            fmodel_params.high_resolution = params.d_min
            ref_array = mmtbx.utils.fmodel_from_xray_structure(xray_structure=xrs, params=fmodel_params).f_model.as_intensity_array()
        else:
            raise "input file type invalid"

        if ref_array is None:
            raise "suitable reference data not found"

        rb = ReferenceBased(xac_files, ref_array, max_delta=params.max_delta,
                            d_min=params.d_min, min_ios=params.min_ios,
                            nproc=params.nproc, log_out=log_out)
    else:
        raise "Unknown method: %s" % params.method

    if params.method == "selective_breeding":
        rb.assign_operators(max_cycle=params.max_cycles)
    else:
        rb.assign_operators()

    out_prefix = os.path.splitext(os.path.basename(params.lstin))[0]

    ofs_cell = open(out_prefix+"_reindexed_cells.dat", "w")
    new_files = rb.modify_xds_ascii_files(cells_dat_out=ofs_cell)

    lstout = out_prefix + "_reindexed.lst"
    ofs = open(lstout, "w")
    ofs.write("\n".join(new_files)+"\n")
    ofs.close()
    print >>log_out, "Reindexing done. For merging, use %s instead!" % lstout

    if params.method == "brehm_diederichs":
        print >>log_out, """
CCTBX-implementation (by Richard Gildea) of the "algorithm 2" of the following paper was used.
For publication, please cite:
 Brehm, W. and Diederichs, K. Breaking the indexing ambiguity in serial crystallography.
 Acta Cryst. (2014). D70, 101-109
 http://dx.doi.org/10.1107/S1399004713025431"""
    elif params.method == "selective_breeding":
        print >>log_out, """
def run(params):
    log_out = multi_out()
    log_out.register("log", open(params.logfile, "w"), atexit_send_to=None)
    log_out.register("stdout", sys.stdout)

    libtbx.phil.parse(master_params_str).format(params).show(out=log_out,
                                                             prefix=" ")

    xac_files = read_path_list(params.lstin, only_exists=True, err_out=log_out)
    if len(xac_files) == 0:
        print >> log_out, "No (existing) files in the list: %s" % params.lstin
        return

    if params.method == "selective_breeding":
        rb = KabschSelectiveBreeding(xac_files,
                                     max_delta=params.max_delta,
                                     d_min=params.d_min,
                                     min_ios=params.min_ios,
                                     nproc=params.nproc,
                                     log_out=log_out,
                                     from_p1=params.from_p1)
        xs = rb.representative_crystal_symmetry()

        log_out.write("Starting from:\n")
        xs.show_summary(log_out, "  ")
        log_out.write("\n")

        rb.assign_operators(max_cycle=params.max_cycles)
        rb.show_assign_summary()
        final_cc_means = rb.final_cc_means()
        assert len(final_cc_means) == len(xac_files)
        reidx_ops = rb.reindex_operators()
        sg = copy.copy(xs.space_group())
        unit_cell = xs.unit_cell()

        cc0 = map(lambda x: x[0][1], final_cc_means)
        log_out.write("Analyzing KabschSelectiveBreeding result..\n")

        accepted_ops = []

        for iop in range(1, len(reidx_ops)):
            cci = map(lambda x: x[iop][1], final_cc_means)
            corr = numpy.corrcoef(cc0, cci)[0, 1]
            log_out.write("  h,k,l vs %s: corr= %.4f\n" %
                          (reidx_ops[iop].as_hkl(), corr))
            if corr > 0.5:
                accepted_ops.append(reidx_ops[iop])
                sg.expand_smx(reidx_ops[iop].as_hkl())
                unit_cell = unit_cell.change_basis(reidx_ops[iop])
                log_out.write("    this operator accepted. sg= %s\n" %
                              sg.info())

        log_out.write("Solution:\n")
        new_xs = crystal.symmetry(unit_cell, space_group=sg)
        new_xs.show_summary(log_out, "  ")
        log_out.write("As reference setting:\n")
        new_xs.as_reference_setting().show_summary(log_out, "  ")

        log_out.write("Initial:\n")
        xs.show_summary(log_out, "  ")

        log_out.write("""
* Notice *
Here the space group is deduced from the similarity of reflection intensities under the constraint of lattice symmetry.
This could be wrong especially when the crystal is twineed.
Please note that space group is only determined when the structure is solved.
""")

    else:
        raise "invalid method choice (method=%s)" % params.method