コード例 #1
0
def merge_datasets(params, workdir, xds_files, cells, space_group):
    if not os.path.exists(workdir): os.makedirs(workdir)
    out = open(os.path.join(workdir, "merge.log"), "w")

    if params.program == "xscale":
        cycles = multi_merging.xscale.XscaleCycles(
            workdir,
            anomalous_flag=params.anomalous,
            d_min=params.d_min,
            d_max=params.d_max,
            reject_method=params.reject_method,
            reject_params=params.rejection,
            xscale_params=params.xscale,
            res_params=params.resolution,
            reference_file=params.reference_file,
            space_group=space_group,
            ref_mtz=params.reference.data
            if params.reference.copy_test_flag else None,
            out=out,
            nproc=params.nproc,
            batch_params=params.batch)

        unused_files, reasons = cycles.run_cycles(xds_files)
        used_files = set(xds_files).difference(set(unused_files))

        print >> out
        print >> out, " SUMMARY "
        print >> out, "========================"
        for i, files in enumerate((used_files, unused_files)):
            print >> out, "\n%6s %4d files:\n" % (
                ("Used", "Unused")[i], len(files))
            if len(files) == 0:
                continue

            maxlen_f = max(
                map(lambda f: len(os.path.relpath(f, params.workdir)), files))

            for f in files:
                cell = cells[f]
                merge_log = os.path.join(os.path.dirname(f),
                                         "merging_stats.log")
                try:
                    lines = open(merge_log).readlines()
                    resn = float(
                        filter(lambda x: x.startswith("Resolution:"),
                               lines)[0].split()[-1])
                    cmpl = float(
                        filter(lambda x: x.startswith("Completeness:"),
                               lines)[0].split()[-1].replace("%", ""))
                except:
                    resn = float("nan")
                    cmpl = float("nan")

                if i == 1:  # print reason
                    print >> out, "%-15s" % reasons.get(f, "unknown"),
                print >> out, ("%-" + str(maxlen_f) + "s") % os.path.relpath(
                    f, params.workdir), cell,
                #print >>out, "ISa=%5.1f" % correctlp.get_ISa(os.path.join(os.path.dirname(f), "CORRECT.LP")),
                print >> out, "Cmpl=%3.0f%%, Resn= %.1f" % (cmpl, resn)

        ret = []
        tkvals = lambda x: (x[-1], x[0], x[-2])  # overall, inner, outer

        for i in xrange(1, cycles.get_last_cycle_number() + 1):
            wd = os.path.join(workdir, "run_%.2d" % i)
            xscale_lp = os.path.join(wd, "XSCALE.LP")
            table = xscalelp.read_stats_table(xscale_lp)
            num_files = len(xscalelp.get_read_data(xscale_lp))
            xtriage_logfile = os.path.join(wd, "ccp4", "logfile.log")
            aniso = xds_aniso_analysis.parse_logfile(
                os.path.join(wd, "aniso.log"))
            cellinfo = cycles.cell_info_at_cycles[i]
            ret.append([
                i, wd, num_files,
                dict(cmpl=tkvals(table["cmpl"]),
                     redundancy=tkvals(table["redundancy"]),
                     i_over_sigma=tkvals(table["i_over_sigma"]),
                     r_meas=tkvals(table["r_meas"]),
                     cc_half=tkvals(table["cc_half"]),
                     sig_ano=tkvals(table["sig_ano"]),
                     cc_ano=tkvals(table["cc_ano"]),
                     drange=tkvals(table["d_range"]),
                     lp=xscale_lp,
                     xtriage_log=xtriage.XtriageLogfile(xtriage_logfile),
                     aniso=aniso,
                     lcv=cellinfo[1],
                     alcv=cellinfo[2],
                     dmin_est=cycles.dmin_est_at_cycles.get(i, float("nan")))
            ])

        xscale_lp = os.path.join(cycles.current_working_dir(), "XSCALE.LP")
        print >> out, "\nFinal statistics:\n"
        print >> out, xscalelp.snip_stats_table(xscale_lp)

        return ret

    elif params.program == "aimless":
        worker = Pointless()
        print >> out, "\nRunning pointless"
        runinfo = worker.run_copy(hklout="pointless.mtz",
                                  wdir=workdir,
                                  xdsin=xds_files,
                                  logout=os.path.join(workdir,
                                                      "pointless.log"),
                                  tolerance=30)

        # Table of file name -> Batch range
        assert len(xds_files) == len(runinfo)
        batch_info = collections.OrderedDict(
            map(lambda x: (x[0], (x[1][1:3])), zip(xds_files, runinfo)))

        cycles = multi_merging.aimless.AimlessCycles(
            workdir,
            anomalous_flag=params.anomalous,
            d_min=params.d_min,
            d_max=params.d_max,
            reject_method=params.reject_method,
            cc_cutoff=params.rejection.lpstats.pwcc.abs_cutoff,
            delta_cchalf_bin=params.rejection.delta_cchalf.bin,
            mtzin=os.path.join(workdir, "pointless.mtz"),
            batch_info=batch_info,
            out=out,
            nproc=params.nproc,
            nproc_each=params.batch.nproc_each,
            batchjobs=None)  # FIXME batchjobs
        unused_files, reasons = cycles.run_cycles(xds_files)
        used_files = set(xds_files).difference(set(unused_files))

        print >> out
        print >> out, " SUMMARY "
        print >> out, "========================"
        for i, files in enumerate((used_files, unused_files)):
            print >> out, "\n%6s %4d files:\n" % (
                ("Used", "Unused")[i], len(files))
            if len(files) == 0:
                continue

            maxlen_f = max(
                map(lambda f: len(os.path.relpath(f, params.workdir)), files))

            for f in files:
                cell = cells[f]
                merge_log = os.path.join(os.path.dirname(f),
                                         "merging_stats.log")
                try:
                    lines = open(merge_log).readlines()
                    resn = float(
                        filter(lambda x: x.startswith("Resolution:"),
                               lines)[0].split()[-1])
                    cmpl = float(
                        filter(lambda x: x.startswith("Completeness:"),
                               lines)[0].split()[-1].replace("%", ""))
                except:
                    resn = float("nan")
                    cmpl = float("nan")

                if i == 1:  # print reason
                    print >> out, "%-15s" % reasons.get(f, "unknown"),
                print >> out, ("%-" + str(maxlen_f) + "s") % os.path.relpath(
                    f, params.workdir), cell,
                print >> out, "ISa=%5.1f" % correctlp.get_ISa(
                    os.path.join(os.path.dirname(f), "CORRECT.LP")),
                print >> out, "Cmpl=%3.0f%%, Resn= %.1f" % (cmpl, resn)

        aimless_log = os.path.join(cycles.current_working_dir(), "aimless.log")
        print >> out, "\nFinal statistics:\n"
        print >> out, aimless.snip_summary(aimless_log)

        # Write summary
        table = aimless.read_summary(aimless_log)

        tkvals = lambda x: (x[0], x[1], x[2])  # overall, inner, outer
        return [
            [
                cycles.get_last_cycle_number(),
                cycles.current_working_dir(),
                len(used_files),
                dict(cmpl=tkvals(table["cmpl"]),
                     redundancy=tkvals(table["redundancy"]),
                     i_over_sigma=tkvals(table["i_over_sigma"]),
                     r_meas=tkvals(table["r_meas"]),
                     cc_half=tkvals(table["cc_half"]),
                     sig_ano=(float("nan"), ) * 3,
                     cc_ano=tkvals(table["cc_ano"]))
            ],
        ]

        #print >>out, "\nRunning aimless"
        #aimless.run_aimless(mtzin="pointless.mtz",
        #                    wdir=workdir,
        #                    anomalous=params.anomalous, d_min=params.d_min, prefix=None)

    else:
        print >> out, "Unknown program:", params.program
        return []
コード例 #2
0
ファイル: aimless.py プロジェクト: keitaroyam/yamtbx
    def run_cycle(self, xds_files, do_rejection=True):
        if len(xds_files) == 0:
            print >> self.out, "Error: no files given."
            return

        inp_str = ""
        for i, f in enumerate(xds_files):
            brange = self.batch_info[f]
            inp_str += "RUN %3d BATCH %4d to %4d\n" % (i + 1, brange[0], brange[1])

        print >> self.out, "DEBUG:: running aimless with %3d files.." % len(xds_files)
        aimless.run_aimless(
            mtzin=os.path.relpath(self.mtzin, self.workdir),
            wdir=self.workdir,
            anomalous=self.anomalous_flag,
            d_min=self.d_min,
            prefix="aimless",
            add_stdin=inp_str,
        )
        aimless_log = os.path.join(self.workdir, "aimless.log")

        # XXX Aimless error handling here.

        if not do_rejection:
            return

        # Remove bad data
        remove_idxes = []

        if self.reject_method == "delta_cc1/2":
            print >> self.out, "Rejection based on delta_CC1/2 in %s shell" % self.delta_cchalf_bin
            table = aimless.read_summary(aimless_log)
            i_stat = 0 if self.delta_cchalf_bin == "total" else 2
            prev_cchalf = table["cc_half"][i_stat]
            prev_nuniq = table["nuniq"][i_stat]
            # file_name->idx table
            remaining_files = collections.OrderedDict(map(lambda x: x[::-1], enumerate(xds_files)))

            for i in xrange(len(xds_files) - 1):  # if only one file, cannot proceed.
                tmpdir = os.path.join(self.workdir, "reject_test_%.3d" % i)

                cchalf_list = aimless.calc_cchalf_by_removing(
                    wdir=tmpdir,
                    mtzin=self.mtzin,
                    batch_info=self.batch_info,
                    inpfiles=remaining_files.keys(),
                    anomalous_flag=self.anomalous_flag,
                    d_min=self.d_min,
                    stat_bin=self.delta_cchalf_bin,
                    nproc=self.nproc,
                    nproc_each=self.nproc_each,
                    batchjobs=self.batchjobs,
                )

                rem_idx, cc_i, nuniq_i = cchalf_list[0]  # First (largest) is worst one to remove.
                rem_idx_in_org = remaining_files[remaining_files.keys()[rem_idx]]

                # Decision making by CC1/2
                print >> self.out, "DEBUG:: remove %3d if %.4f*%d > %.4f*%d" % (
                    rem_idx_in_org,
                    cc_i,
                    nuniq_i,
                    prev_cchalf,
                    prev_nuniq,
                )
                if cc_i * nuniq_i <= prev_cchalf * prev_nuniq:
                    break
                print >> self.out, "Removing idx= %3d gains CC1/2 by %.4f" % (rem_idx_in_org, cc_i - prev_cchalf)

                prev_cchalf, prev_nuniq = cc_i, nuniq_i
                remove_idxes.append(rem_idx_in_org)
                del remaining_files[remaining_files.keys()[rem_idx]]  # remove file from table
        else:
            print >> self.out, "ERROR:: Unsupported reject_method (%s)" % reject_method

        if len(remove_idxes) > 0:
            print >> self.out, "DEBUG:: Need to remove %d files" % len(remove_idxes)
            for i in sorted(remove_idxes):
                print >> self.out, " %.3d %s" % (i + 1, xds_files[i])
                self.removed_files.append(xds_files[i])
                self.removed_reason[xds_files[i]] = "badcc"

        if self.next_delta_cchalf_bin != []:
            self.delta_cchalf_bin = self.next_delta_cchalf_bin.pop(0)
            do_rejection = True
        else:
            do_rejection = False

        if do_rejection or len(remove_idxes) > 0:
            keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_files)))
            self.workdir = self.request_next_workdir()
            self.run_cycle(map(lambda i: xds_files[i], keep_idxes), do_rejection=do_rejection)
コード例 #3
0
ファイル: multi_merge.py プロジェクト: harumome/kamo
def merge_datasets(params, workdir, xds_files, cells, batchjobs):
    if not os.path.exists(workdir): os.makedirs(workdir)
    out = open(os.path.join(workdir, "merge.log"), "w")

    if params.program == "xscale":
        cycles = multi_merging.xscale.XscaleCycles(workdir, 
                                                   anomalous_flag=params.anomalous,
                                                   d_min=params.d_min, d_max=params.d_max, 
                                                   reject_method=params.reject_method,
                                                   reject_params=params.rejection,
                                                   xscale_params=params.xscale,
                                                   reference_file=params.reference_file,
                                                   out=out, nproc=params.nproc,
                                                   nproc_each=params.batch.nproc_each,
                                                   batchjobs=batchjobs if "deltacchalf" in params.batch.par_run else None)
        unused_files, reasons = cycles.run_cycles(xds_files)
        used_files = set(xds_files).difference(set(unused_files))

        print >>out
        print >>out, " SUMMARY "
        print >>out, "========================"
        for i, files in enumerate((used_files, unused_files)):
            print >>out, "\n%6s %4d files:\n" % (("Used", "Unused")[i], len(files))
            if len(files) == 0:
                continue

            maxlen_f = max(map(lambda f: len(os.path.relpath(f, params.workdir)), files))

            for f in files:
                cell = cells[f]
                merge_log = os.path.join(os.path.dirname(f), "merging_stats.log")
                try:
                    lines = open(merge_log).readlines()
                    resn = float(filter(lambda x:x.startswith("Resolution:"), lines)[0].split()[-1])
                    cmpl = float(filter(lambda x:x.startswith("Completeness:"), lines)[0].split()[-1].replace("%",""))
                except:
                    resn = float("nan")
                    cmpl = float("nan")

                if i == 1: # print reason
                    print >>out, "%-15s"%reasons.get(f, "unknown"),
                print >>out, ("%-"+str(maxlen_f)+"s")%os.path.relpath(f, params.workdir), cell,
                #print >>out, "ISa=%5.1f" % correctlp.get_ISa(os.path.join(os.path.dirname(f), "CORRECT.LP")),
                print >>out, "Cmpl=%3.0f%%, Resn= %.1f" % (cmpl, resn)

        ret = []
        tkvals = lambda x: (x[-1], x[0], x[-2]) # overall, inner, outer

        for i in xrange(1, cycles.get_last_cycle_number()+1):
            wd = os.path.join(workdir, "run_%.2d"%i)
            xscale_lp = os.path.join(wd, "XSCALE.LP")
            table = xscalelp.read_stats_table(xscale_lp)
            num_files = len(xscalelp.get_read_data(xscale_lp))
            xtriage_logfile = os.path.join(wd, "ccp4", "logfile.log")
            ret.append([i, wd, num_files,
                        dict(cmpl=tkvals(table["cmpl"]),
                             redundancy=tkvals(table["redundancy"]),
                             i_over_sigma=tkvals(table["i_over_sigma"]),
                             r_meas=tkvals(table["r_meas"]),
                             cc_half=tkvals(table["cc_half"]),
                             sig_ano=tkvals(table["sig_ano"]),
                             cc_ano=tkvals(table["cc_ano"]),
                             drange=tkvals(table["d_range"]),
                             lp=xscale_lp,
                             xtriage_log=xtriage.XtriageLogfile(xtriage_logfile))
                        ])

        xscale_lp = os.path.join(cycles.current_working_dir(), "XSCALE.LP")
        print >>out, "\nFinal statistics:\n"
        print >>out, xscalelp.snip_stats_table(xscale_lp)

        return ret

    elif params.program == "aimless":
        worker = Pointless()
        print >>out, "\nRunning pointless"
        runinfo = worker.run_copy(hklout="pointless.mtz", wdir=workdir,
                                  xdsin=xds_files,
                                  logout=os.path.join(workdir, "pointless.log"),
                                  tolerance=30)

        # Table of file name -> Batch range
        assert len(xds_files) == len(runinfo)
        batch_info = collections.OrderedDict(map(lambda x: (x[0], (x[1][1:3])), zip(xds_files, runinfo)))

        cycles = multi_merging.aimless.AimlessCycles(workdir, 
                                                     anomalous_flag=params.anomalous,
                                                     d_min=params.d_min, d_max=params.d_max, 
                                                     reject_method=params.reject_method,
                                                     cc_cutoff=params.rejection.lpstats.pwcc.abs_cutoff,
                                                     delta_cchalf_bin=params.rejection.delta_cchalf.bin,
                                                     mtzin=os.path.join(workdir, "pointless.mtz"),
                                                     batch_info=batch_info,
                                                     out=out, nproc=params.nproc,
                                                     nproc_each=params.batch.nproc_each,
                                                     batchjobs=batchjobs if "deltacchalf" in params.batch.par_run else None)
        unused_files, reasons = cycles.run_cycles(xds_files)
        used_files = set(xds_files).difference(set(unused_files))

        print >>out
        print >>out, " SUMMARY "
        print >>out, "========================"
        for i, files in enumerate((used_files, unused_files)):
            print >>out, "\n%6s %4d files:\n" % (("Used", "Unused")[i], len(files))
            if len(files) == 0:
                continue

            maxlen_f = max(map(lambda f: len(os.path.relpath(f, params.workdir)), files))

            for f in files:
                cell = cells[f]
                merge_log = os.path.join(os.path.dirname(f), "merging_stats.log")
                try:
                    lines = open(merge_log).readlines()
                    resn = float(filter(lambda x:x.startswith("Resolution:"), lines)[0].split()[-1])
                    cmpl = float(filter(lambda x:x.startswith("Completeness:"), lines)[0].split()[-1].replace("%",""))
                except:
                    resn = float("nan")
                    cmpl = float("nan")

                if i == 1: # print reason
                    print >>out, "%-15s"%reasons.get(f, "unknown"),
                print >>out, ("%-"+str(maxlen_f)+"s")%os.path.relpath(f, params.workdir), cell,
                print >>out, "ISa=%5.1f" % correctlp.get_ISa(os.path.join(os.path.dirname(f), "CORRECT.LP")),
                print >>out, "Cmpl=%3.0f%%, Resn= %.1f" % (cmpl, resn)

        aimless_log = os.path.join(cycles.current_working_dir(), "aimless.log")
        print >>out, "\nFinal statistics:\n"
        print >>out, aimless.snip_summary(aimless_log)

        # Write summary
        table = aimless.read_summary(aimless_log)

        tkvals = lambda x: (x[0], x[1], x[2]) # overall, inner, outer
        return [[cycles.get_last_cycle_number(), cycles.current_working_dir(), len(used_files),
                dict(cmpl=tkvals(table["cmpl"]),
                     redundancy=tkvals(table["redundancy"]),
                     i_over_sigma=tkvals(table["i_over_sigma"]),
                     r_meas=tkvals(table["r_meas"]),
                     cc_half=tkvals(table["cc_half"]),
                     sig_ano=(float("nan"),)*3,
                     cc_ano=tkvals(table["cc_ano"]))], ]

        #print >>out, "\nRunning aimless"
        #aimless.run_aimless(mtzin="pointless.mtz",
        #                    wdir=workdir,
        #                    anomalous=params.anomalous, d_min=params.d_min, prefix=None)

    else:
        print >>out, "Unknown program:", params.program
        return []
コード例 #4
0
ファイル: aimless.py プロジェクト: robertbuecker/yamtbx
    def run_cycle(self, xds_files, do_rejection=True):
        if len(xds_files) == 0:
            print >> self.out, "Error: no files given."
            return

        inp_str = ""
        for i, f in enumerate(xds_files):
            brange = self.batch_info[f]
            inp_str += "RUN %3d BATCH %4d to %4d\n" % (i + 1, brange[0],
                                                       brange[1])

        print >> self.out, "DEBUG:: running aimless with %3d files.." % len(
            xds_files)
        aimless.run_aimless(mtzin=os.path.relpath(self.mtzin, self.workdir),
                            wdir=self.workdir,
                            anomalous=self.anomalous_flag,
                            d_min=self.d_min,
                            prefix="aimless",
                            add_stdin=inp_str)
        aimless_log = os.path.join(self.workdir, "aimless.log")

        # XXX Aimless error handling here.

        if not do_rejection:
            return

        # Remove bad data
        remove_idxes = []

        if self.reject_method == "delta_cc1/2":
            print >> self.out, "Rejection based on delta_CC1/2 in %s shell" % self.delta_cchalf_bin
            table = aimless.read_summary(aimless_log)
            i_stat = 0 if self.delta_cchalf_bin == "total" else 2
            prev_cchalf = table["cc_half"][i_stat]
            prev_nuniq = table["nuniq"][i_stat]
            # file_name->idx table
            remaining_files = collections.OrderedDict(
                map(lambda x: x[::-1], enumerate(xds_files)))

            for i in xrange(len(xds_files) -
                            1):  # if only one file, cannot proceed.
                tmpdir = os.path.join(self.workdir, "reject_test_%.3d" % i)

                cchalf_list = aimless.calc_cchalf_by_removing(
                    wdir=tmpdir,
                    mtzin=self.mtzin,
                    batch_info=self.batch_info,
                    inpfiles=remaining_files.keys(),
                    anomalous_flag=self.anomalous_flag,
                    d_min=self.d_min,
                    stat_bin=self.delta_cchalf_bin,
                    nproc=self.nproc,
                    nproc_each=self.nproc_each,
                    batchjobs=self.batchjobs)

                rem_idx, cc_i, nuniq_i = cchalf_list[
                    0]  # First (largest) is worst one to remove.
                rem_idx_in_org = remaining_files[remaining_files.keys()
                                                 [rem_idx]]

                # Decision making by CC1/2
                print >> self.out, "DEBUG:: remove %3d if %.4f*%d > %.4f*%d" % (
                    rem_idx_in_org, cc_i, nuniq_i, prev_cchalf, prev_nuniq)
                if cc_i * nuniq_i <= prev_cchalf * prev_nuniq: break
                print >> self.out, "Removing idx= %3d gains CC1/2 by %.4f" % (
                    rem_idx_in_org, cc_i - prev_cchalf)

                prev_cchalf, prev_nuniq = cc_i, nuniq_i
                remove_idxes.append(rem_idx_in_org)
                del remaining_files[remaining_files.keys()
                                    [rem_idx]]  # remove file from table
        else:
            print >> self.out, "ERROR:: Unsupported reject_method (%s)" % reject_method

        if len(remove_idxes) > 0:
            print >> self.out, "DEBUG:: Need to remove %d files" % len(
                remove_idxes)
            for i in sorted(remove_idxes):
                print >> self.out, " %.3d %s" % (i + 1, xds_files[i])
                self.removed_files.append(xds_files[i])
                self.removed_reason[xds_files[i]] = "badcc"

        if self.next_delta_cchalf_bin != []:
            self.delta_cchalf_bin = self.next_delta_cchalf_bin.pop(0)
            do_rejection = True
        else:
            do_rejection = False

        if do_rejection or len(remove_idxes) > 0:
            keep_idxes = filter(lambda x: x not in remove_idxes,
                                xrange(len(xds_files)))
            self.workdir = self.request_next_workdir()
            self.run_cycle(map(lambda i: xds_files[i], keep_idxes),
                           do_rejection=do_rejection)