Example #1
0
def run(lstin):
    data = []
    for l in open(lstin):
        xdsasc = l.strip()
        xa = XDS_ASCII(xdsasc, sys.stdout, i_only=True)
        ma = miller.array(miller_set=xa.as_miller_set(anomalous_flag=False),
                          data=xa.iobs)
        data.append((xdsasc, ma))

    print "index filename"
    for i, d in enumerate(data):
        print i, d[0]

    print "i j n.i n.j n.common cc"
    for i in xrange(len(data)-1):
        for j in xrange(i+1, len(data)):
            di, dj = data[i][1].common_sets(data[j][1], assert_is_similar_symmetry=False)
            print i, j, data[i][1].data().size(), data[j][1].data().size(), 
            if len(di.data()) == 0:
                print 0, "nan"
            else:
                corr = flex.linear_correlation(di.data(), dj.data())
                assert corr.is_well_defined()
                cc =  corr.coefficient()
                print len(di.data()), cc
Example #2
0
def calc_merging_stats(xac_file, cut_resolution=True):
    import iotbx.merging_statistics
    from yamtbx.dataproc.xds.xds_ascii import XDS_ASCII

    wdir = os.path.dirname(xac_file)
    pklout = os.path.join(wdir, "merging_stats.pkl")
    logout = open(os.path.join(wdir, "merging_stats.log"), "w")

    print >>logout, xac_file
    print >>logout, ""
    print >>logout, "Estimate cutoff"
    print >>logout, "================"

    obj = XDS_ASCII(xac_file, i_only=True)
    i_obs = obj.i_obs()
    d_min = None
    if i_obs.size() < 10: return

    try:
        cutoffs = resolution_cutoff.estimate_crude_resolution_cutoffs(i_obs=i_obs)
        cutoffs.show(out=logout)

        if cutoffs.cc_one_half_cut != float("inf") and cut_resolution:
            d_min = cutoffs.cc_one_half_cut
    except Sorry, e:
        print >>logout, e.message
Example #3
0
    def __init__(self,
                 xac_files,
                 d_min=3,
                 min_ios=3,
                 nproc=1,
                 max_delta=3,
                 log_out=null_out()):
        self.xac_files = xac_files
        self.log_out = log_out
        self.nproc = nproc
        self.arrays = []
        self.max_delta = max_delta
        self.best_operators = None

        print >> self.log_out, "Reading"
        for i, f in enumerate(self.xac_files):
            print >> self.log_out, "%4d %s" % (i, f)
            xac = XDS_ASCII(f, i_only=True)
            xac.remove_rejected()
            a = xac.i_obs().resolution_filter(d_min=d_min)
            if min_ios is not None:
                a = a.select(a.data() / a.sigmas() >= min_ios)
            a = a.as_non_anomalous_array().merge_equivalents(
                use_internal_variance=False).array()
            self.arrays.append(a)

        print >> self.log_out, ""
Example #4
0
def run(lstin):
    data = []
    for l in open(lstin):
        xdsasc = l.strip()
        xa = XDS_ASCII(xdsasc, sys.stdout, i_only=True)
        ma = miller.array(miller_set=xa.as_miller_set(anomalous_flag=False),
                          data=xa.iobs)
        data.append((xdsasc, ma))

    print "index filename"
    for i, d in enumerate(data):
        print i, d[0]

    print "i j n.i n.j n.common cc"
    for i in xrange(len(data) - 1):
        for j in xrange(i + 1, len(data)):
            di, dj = data[i][1].common_sets(data[j][1],
                                            assert_is_similar_symmetry=False)
            print i, j, data[i][1].data().size(), data[j][1].data().size(),
            if len(di.data()) == 0:
                print 0, "nan"
            else:
                corr = flex.linear_correlation(di.data(), dj.data())
                assert corr.is_well_defined()
                cc = corr.coefficient()
                print len(di.data()), cc
Example #5
0
def get_xac_info(xac, get_nframes=False):
    ret = {}

    for l in open(xac):
        if l.startswith(
                "!FORMAT=XDS_ASCII"
        ):  # !FORMAT=XDS_ASCII    MERGE=FALSE    FRIEDEL'S_LAW=FALSE
            ret["friedels_law"] = l[l.rindex("=") + 1:].strip()
        if l.startswith("!INCLUDE_RESOLUTION_RANGE="):
            ret["resol_range"] = l[l.index("=") + 1:].strip()
        elif l.startswith("!SPACE_GROUP_NUMBER="):
            ret["spgr_num"] = l[l.index("=") + 1:].strip()
        elif l.startswith("!UNIT_CELL_CONSTANTS="):
            ret["cell"] = l[l.index("=") + 1:].strip()
        elif l.startswith("!END_OF_HEADER"):
            break

    if not "resol_range" in ret:
        d_max_min = XDS_ASCII(xac, i_only=True).as_miller_set().d_max_min()
        ret["resol_range"] = "%.3f %.3f" % d_max_min

    if get_nframes:
        frame_range = XDS_ASCII(f, read_data=False).get_frame_range()
        ret["nframes"] = frame_range[1] - frame_range[0]

    return ret
Example #6
0
    def modify_xds_ascii_files(self, suffix="_reidx", cells_dat_out=None):
        #ofs_lst = open("for_merge_new.lst", "w")
        if cells_dat_out: cells_dat_out.write("file a b c al be ga\n")

        new_files = []
        print >>self.log_out, "Writing reindexed files.."
        for i, (f, op) in enumerate(zip(self.xac_files, self.best_operators)):
            xac = XDS_ASCII(f, read_data=False)
            if op.is_identity_op():
                new_files.append(f)
                if cells_dat_out:
                    cell = xac.symm.unit_cell().parameters()
                    cells_dat_out.write(f+" "+" ".join(map(lambda x:"%7.3f"%x, cell))+"\n")

                continue

            newf = f.replace(".HKL", suffix+".HKL") if ".HKL" in f else os.path.splitext(f)[0]+suffix+".HKL"
            print >>self.log_out, "%4d %s" % (i, newf)

            cell_tr = xac.write_reindexed(op, newf, space_group=self.arrays[0].crystal_symmetry().space_group())
            #ofs_lst.write(newf+"\n")
            new_files.append(newf)

            if cells_dat_out:
                cells_dat_out.write(newf+" "+" ".join(map(lambda x:"%7.3f"%x, cell_tr.parameters()))+"\n")

        return new_files
Example #7
0
def calc_merging_stats(xac_file, cut_resolution=True):
    import iotbx.merging_statistics
    from yamtbx.dataproc.xds.xds_ascii import XDS_ASCII

    wdir = os.path.dirname(xac_file)
    pklout = os.path.join(wdir, "merging_stats.pkl")
    logout = open(os.path.join(wdir, "merging_stats.log"), "w")

    print >> logout, xac_file
    print >> logout, ""
    print >> logout, "Estimate cutoff"
    print >> logout, "================"

    obj = XDS_ASCII(xac_file, i_only=True)
    i_obs = obj.i_obs()
    d_min = None
    if i_obs.size() < 10: return

    try:
        cutoffs = resolution_cutoff.estimate_crude_resolution_cutoffs(
            i_obs=i_obs)
        cutoffs.show(out=logout)

        if cutoffs.cc_one_half_cut != float("inf") and cut_resolution:
            d_min = cutoffs.cc_one_half_cut
    except Sorry, e:
        print >> logout, e.message
Example #8
0
    def modify_xds_ascii_files(self, suffix="_reidx", cells_dat_out=None):
        #ofs_lst = open("for_merge_new.lst", "w")
        if cells_dat_out: cells_dat_out.write("file a b c al be ga\n")

        new_files = []
        print >> self.log_out, "Writing reindexed files.."
        for i, (f, op) in enumerate(zip(self.xac_files, self.best_operators)):
            xac = XDS_ASCII(f, read_data=False)
            if op.is_identity_op():
                new_files.append(f)
                if cells_dat_out:
                    cell = xac.symm.unit_cell().parameters()
                    cells_dat_out.write(
                        f + " " + " ".join(map(lambda x: "%7.3f" % x, cell)) +
                        "\n")

                continue

            newf = f.replace(
                ".HKL", suffix + ".HKL"
            ) if ".HKL" in f else os.path.splitext(f)[0] + suffix + ".HKL"
            print >> self.log_out, "%4d %s" % (i, newf)

            cell_tr = xac.write_reindexed(
                op,
                newf,
                space_group=self.arrays[0].crystal_symmetry().space_group())
            #ofs_lst.write(newf+"\n")
            new_files.append(newf)

            if cells_dat_out:
                cells_dat_out.write(newf + " " + " ".join(
                    map(lambda x: "%7.3f" % x, cell_tr.parameters())) + "\n")

        return new_files
Example #9
0
def reindex_with_specified_symm(topdir, reference_symm, dirs, out):
    print >>out
    print >>out,  "Re-index to specified symmetry:"
    reference_symm.show_summary(out, "  ")
    print >>out
    print >>out

    cells = {} # cell and file

    sgnum_laue = reference_symm.space_group().build_derived_reflection_intensity_group(False).type().number()

    for wd in dirs:
        print >>out, "%s:" % os.path.relpath(wd, topdir),

        # Find appropriate data
        xac_file = util.return_first_found_file(("XDS_ASCII.HKL_noscale.org", "XDS_ASCII.HKL_noscale", 
                                                 "XDS_ASCII_fullres.HKL.org", "XDS_ASCII_fullres.HKL",
                                                 "XDS_ASCII.HKL.org", "XDS_ASCII.HKL"),
                                                wd=wd)
        if xac_file is None:
            print >>out, "Can't find XDS_ASCII file in %s" % wd
            continue

        if xac_file.endswith(".org"): xac_file_org, xac_file = xac_file, xac_file[:-4]
        else: xac_file_org = xac_file+".org"

        if not os.path.isfile(xac_file_org):
            os.rename(xac_file, xac_file_org)

        xac = XDS_ASCII(xac_file_org, read_data=False)
        print >>out, "%s %s (%s)" % (os.path.basename(xac_file), xac.symm.space_group_info(),
                                   ",".join(map(lambda x: "%.2f"%x, xac.symm.unit_cell().parameters())))

        if xac.symm.reflection_intensity_symmetry(False).space_group_info().type().number() == sgnum_laue:
            if xac.symm.unit_cell().is_similar_to(reference_symm.unit_cell(), 0.1, 10):
                print >>out,  "  Already scaled with specified symmetry"
                os.rename(xac_file_org, xac_file) # rename back
                cells[wd] = (numpy.array(xac.symm.unit_cell().parameters()), xac_file)
                continue

        cosets = reindex.reindexing_operators(reference_symm, xac.symm, 0.2, 20)

        if len(cosets.combined_cb_ops())==0:
            print >>out, "Can't find operator:"
            xac.symm.show_summary(out, " ")
            reference_symm.show_summary(out, " ")
            continue

        newcell = xac.write_reindexed(op=cosets.combined_cb_ops()[0],
                                      space_group=reference_symm.space_group(),
                                      hklout=xac_file)
        cells[wd] = (numpy.array(newcell.parameters()), xac_file)

        newcell = " ".join(map(lambda x: "%.3f"%x, newcell.parameters()))
        print >>out,  "  Reindexed to transformed cell: %s with %s" % (newcell, cosets.combined_cb_ops()[0].as_hkl())

    return cells
Example #10
0
def run(hklin,
        hklin_merged=None,
        cone_angle=20.,
        n_bins=10,
        anomalous=None,
        do_fit=True,
        log_out=null_out()):
    if 1:
        xac = XDS_ASCII(hklin, i_only=True)
        xac.remove_rejected()
        i_obs = xac.i_obs()
    #else:
    #    import iotbx.mtz
    #    i_obs = filter(lambda x: "SIGI" in x.info().label_string(), iotbx.mtz.object(hklin).as_miller_arrays(merge_equivalents=False))[0]

    print >> log_out, "Unmerged intensity read from", hklin
    i_obs.show_summary(log_out, prefix=" ")
    print >> log_out, ""

    if anomalous is not None and i_obs.anomalous_flag() != anomalous:
        print >> log_out, "Changing anomalous flag based on user's input"
        i_obs = i_obs.customized_copy(anomalous_flag=anomalous)

    if hklin_merged is not None:
        f = iotbx.file_reader.any_file(hklin)
        array_merged = f.file_server.get_xray_data(file_name=None,
                                                   labels=None,
                                                   ignore_all_zeros=True,
                                                   parameter_scope="",
                                                   prefer_anomalous=False,
                                                   prefer_amplitudes=False)
        print >> log_out, "Merged intensity read from", hklin_merged
        array_merged.show_summary(log_out, prefix=" ")
    else:
        array_merged = i_obs.merge_equivalents(
            use_internal_variance=False).array()
        print >> log_out, "Merged intensity calculated"

    print >> log_out, ""

    bad_data = array_merged.select(
        array_merged.data() <
        -3 * array_merged.sigmas())  # FIXME What if already omitted..
    i_obs = i_obs.delete_indices(other=bad_data)

    array_merged = array_merged.select(array_merged.sigmas() > 0)

    if anomalous is not None and not anomalous and array_merged.anomalous_flag(
    ):
        print >> log_out, "Converting to non-anomalous data..\n"
        array_merged = array_merged.average_bijvoet_mates()

    return make_aniso_stats_table(i_obs, array_merged, cone_angle, n_bins,
                                  do_fit, log_out)
Example #11
0
    def run_cycles(self, xds_ascii_files):
        self.all_data_root = os.path.dirname(os.path.commonprefix(xds_ascii_files))
        self.removed_files = []
        self.removed_reason = {}
        print >>self.out, "********************* START FUNCTION ***********************"
        if self.reference_file:
            self.run_cycle([self.reference_file,]+xds_ascii_files)
        else:
            self.run_cycle(xds_ascii_files)

        if self.res_params.estimate:
            #self.cut_resolution(self.get_last_cycle_number())
            for run_i in xrange(1, self.get_last_cycle_number()+1):
                try: self.estimate_resolution(run_i)
                except: print >>self.out, traceback.format_exc() # Don't want to stop the program.

        for wd in glob.glob(os.path.join(self.workdir_org, "run_*")):
            if os.path.exists(os.path.join(wd, "ccp4")): continue
            xscale_hkl = os.path.abspath(os.path.join(wd, "xscale.hkl"))
            sg = None # Use user-specified one. Otherwise follow pointless.
            try:
                sg = XDS_ASCII(xscale_hkl, read_data=False).symm.space_group()
                laue_symm_str = str(sg.build_derived_reflection_intensity_group(False).info())
                worker = Pointless()
                result = worker.run_for_symm(xdsin=xscale_hkl,
                                             logout=os.path.join(wd, "pointless.log"),
                                             choose_laue=laue_symm_str,
                                             xdsin_to_p1=True)
                
                if "symm" in result:
                    print >>self.out, "Pointless suggestion (forcing %s symmetry):" % laue_symm_str
                    result["symm"].show_summary(self.out, " ")
                    sg = str(result["symm"].space_group_info())
                else:
                    print >>self.out, "Pointless failed."
            except:
                # Don't want to stop the program.
                print >>self.out, traceback.format_exc()

            if self.space_group is not None:
                sg = str(self.space_group.info())

            try:
                xds2mtz.xds2mtz(xds_file=xscale_hkl,
                                dir_name=os.path.join(wd, "ccp4"),
                                run_xtriage=True, run_ctruncate=True,
                                with_multiplicity=True,
                                space_group=sg,
                                flag_source=self.ref_mtz)
            except:
                # Don't want to stop the program.
                print >>self.out, traceback.format_exc()

        return self.removed_files, self.removed_reason
Example #12
0
def read_xac_files(xac_files, d_min=None, d_max=None, min_ios=None):
    arrays = collections.OrderedDict()

    for f in xac_files:
        xac = XDS_ASCII(f, i_only=True)
        xac.remove_rejected()
        a = xac.i_obs().resolution_filter(d_min=d_min, d_max=d_max)
        a = a.as_non_anomalous_array().merge_equivalents(use_internal_variance=False).array()
        if min_ios is not None: a = a.select(a.data()/a.sigmas()>=min_ios)
        arrays[f] = a

    return arrays
Example #13
0
def read_xac_files(xac_files, d_min=None, d_max=None, min_ios=None):
    arrays = collections.OrderedDict()

    for f in xac_files:
        xac = XDS_ASCII(f, i_only=True)
        xac.remove_rejected()
        a = xac.i_obs().resolution_filter(d_min=d_min, d_max=d_max)
        a = a.as_non_anomalous_array().merge_equivalents(
            use_internal_variance=False).array()
        if min_ios is not None: a = a.select(a.data() / a.sigmas() >= min_ios)
        arrays[f] = a

    return arrays
Example #14
0
    def read_xac_files(self, from_p1=False):
        op_to_p1 = None
        if from_p1:
            """
            This option is currently for multi_determine_symmetry.
            Do not use this for ambiguity resolution! op_to_p1 is not considered when writing new HKL files.
            """
            self.log_out.write("\nAveraging symmetry of all inputs..\n")
            cells = []
            sgs = []
            for f in self.xac_files:
                xac =  XDS_ASCII(f, read_data=False)
                cells.append(xac.symm.unit_cell().parameters())
                sgs.append(xac.symm.space_group())
            assert len(set(sgs)) < 2
            avg_symm = crystal.symmetry(list(numpy.median(cells, axis=0)), space_group=sgs[0])
            op_to_p1 = avg_symm.change_of_basis_op_to_niggli_cell()
            self.log_out.write("  Averaged symmetry: %s (%s)\n" % (format_unit_cell(avg_symm.unit_cell()), sgs[0].info()))
            self.log_out.write("  Operator to Niggli cell: %s\n" % op_to_p1.as_hkl())
            self.log_out.write("        Niggli cell: %s\n" % format_unit_cell(avg_symm.unit_cell().change_basis(op_to_p1)))

        print >>self.log_out, "\nReading"
        cells = []
        bad_files, good_files = [], []
        for i, f in enumerate(self.xac_files):
            print >>self.log_out, "%4d %s" % (i, f)
            xac = XDS_ASCII(f, i_only=True)
            self.log_out.write("     d_range: %6.2f - %5.2f" % xac.i_obs().resolution_range())
            self.log_out.write(" n_ref=%6d" % xac.i_obs().size())
            xac.remove_rejected()
            a = xac.i_obs().resolution_filter(d_min=self.d_min)
            if self.min_ios is not None: a = a.select(a.data()/a.sigmas()>=self.min_ios)
            self.log_out.write(" n_ref_filtered=%6d" % a.size())
            if from_p1:
                a = a.change_basis(op_to_p1).customized_copy(space_group_info=sgtbx.space_group_info("P1"))
            a = a.as_non_anomalous_array().merge_equivalents(use_internal_variance=False).array()
            self.log_out.write(" n_ref_merged=%6d\n" % a.size())
            if a.size() < 2:
                self.log_out.write("     !! WARNING !! number of reflections is dangerously small!!\n")
                bad_files.append(f)
            else:
                self.arrays.append(a)
                cells.append(a.unit_cell().parameters())
                good_files.append(f)

        if bad_files:
            self.xac_files = good_files
            self.bad_files = bad_files

        assert len(self.xac_files) == len(self.arrays) == len(cells)
            
        print >>self.log_out, ""

        self._representative_xs = crystal.symmetry(list(numpy.median(cells, axis=0)),
                                                   space_group_info=self.arrays[0].space_group_info())
Example #15
0
def read_strong_i_from_xds_ascii(xds_ascii_in):
    tmp = XDS_ASCII(xds_ascii_in, i_only=True).i_obs(anomalous_flag=False)
    sel = tmp.sigmas() > 0
    sel &= tmp.data() / tmp.sigmas() > 2
    sel &= tmp.d_spacings() > 3
    if sel.count(True) < 10:
        return None
    tmp = tmp.select(sel)
    merge = tmp.merge_equivalents(use_internal_variance=False)
    return merge.array()
Example #16
0
    def modify_xds_ascii_files(self, suffix="_reidx"):
        #ofs_lst = open("for_merge_new.lst", "w")
        new_files = []
        print >>self.log_out, "Writing reindexed files.."
        for i, (f, op) in enumerate(zip(self.xac_files, self.best_operators)):
            if op.is_identity_op():
                #ofs_lst.write(f+"\n")
                new_files.append(f)
                continue

            newf = f.replace(".HKL", suffix+".HKL") if ".HKL" in f else os.path.splitext(f)[0]+suffix+".HKL"
            print >>self.log_out, "%4d %s" % (i, newf)

            xac = XDS_ASCII(f, read_data=False)
            xac.write_reindexed(op, newf)
            #ofs_lst.write(newf+"\n")
            new_files.append(newf)

        return new_files
Example #17
0
    def __init__(self, xac_files, d_min=3, min_ios=3, nproc=1, max_delta=3, log_out=null_out()):
        self.xac_files = xac_files
        self.log_out = log_out
        self.nproc = nproc
        self.arrays = []
        self.max_delta = max_delta
        self.best_operators = None

        print >>self.log_out, "Reading"
        for i, f in enumerate(self.xac_files):
            print >>self.log_out, "%4d %s" % (i, f)
            xac = XDS_ASCII(f, i_only=True)
            xac.remove_rejected()
            a = xac.i_obs().resolution_filter(d_min=d_min)
            if min_ios is not None: a = a.select(a.data()/a.sigmas()>=min_ios)
            a = a.as_non_anomalous_array().merge_equivalents(use_internal_variance=False).array()
            self.arrays.append(a)

        print >>self.log_out, ""
Example #18
0
 def est_resol(xscale_hkl, res_params, plt_out):
     iobs = XDS_ASCII(xscale_hkl, i_only=True).i_obs()
     est = estimate_resolution_based_on_cc_half(iobs, res_params.cc_one_half_min,
                                                res_params.cc_half_tol,
                                                res_params.n_bins, log_out=self.out)
     est.show_plot(False, plt_out)
     if None not in (est.d_min, est.cc_at_d_min):
         self.out.write("Best resolution cutoff= %.2f A @CC1/2= %.4f\n" % (est.d_min, est.cc_at_d_min))
     else:
         self.out.write("Can't decide resolution cutoff. No reflections??\n")
     return est.d_min
Example #19
0
    def estimate_resolution(self, cycle_number):
        print >>self.out, "**** Determining resolution cutoff in run_%.2d ****" % cycle_number
        last_wd = os.path.join(self.workdir_org, "run_%.2d"%cycle_number)
        xscale_hkl = os.path.abspath(os.path.join(last_wd, "xscale.hkl"))

        i_obs = XDS_ASCII(xscale_hkl, i_only=True).i_obs()
        d_min_est, _ = initial_estimate_byfit_cchalf(i_obs, cc_half_min=self.res_params.cc_one_half_min,
                                                 anomalous_flag=False, log_out=self.out)
                
        self.out.write("Estimated resolution cutoff= %.2f A @CC1/2= %.4f\n" % (d_min_est, self.res_params.cc_one_half_min))
        
        self.dmin_est_at_cycles[cycle_number] = d_min_est
Example #20
0
def read_strong_i_from_xds_ascii(xds_ascii_in):
    tmp = XDS_ASCII(xds_ascii_in, i_only=True).i_obs(anomalous_flag=False)
    sel = tmp.sigmas() > 0
    sel &= tmp.data()/tmp.sigmas() > 2
    sel &= tmp.d_spacings() > 3
    if sel.count(True) < 10:
        return None
    tmp = tmp.select(sel)
    merge = tmp.merge_equivalents(use_internal_variance=False)
    return merge.array()
Example #21
0
    def get_p1cell_and_symm(self, xdsdir):
        dials_hkl = os.path.join(xdsdir, "DIALS.HKL")
        xac_file = util.return_first_found_file(
            ("XDS_ASCII.HKL", "XDS_ASCII.HKL.org", "XDS_ASCII_fullres.HKL.org",
             "XDS_ASCII_fullres.HKL", "XDS_ASCII.HKL_noscale.org",
             "XDS_ASCII.HKL_noscale"),
            wd=xdsdir)

        p1cell, xs = None, None

        if xac_file:
            correct_lp = util.return_first_found_file(
                ("CORRECT.LP_noscale", "CORRECT.LP"), wd=xdsdir)
            if not correct_lp:
                print "CORRECT.LP not found in %s" % xdsdir
                return None, None
            p1cell = correctlp.get_P1_cell(correct_lp, force_obtuse_angle=True)
            try:
                xac = XDS_ASCII(xac_file, read_data=False)
            except:
                print "Invalid XDS_ASCII format:", xac_file
                return None, None
            xs = xac.symm

        elif os.path.isfile(dials_hkl):  # DIALS
            xs = run_dials_auto.get_most_possible_symmetry(xdsdir)
            if xs is None:
                print "Cannot get crystal symmetry:", xdsdir
                return None, None

            p1cell = list(xs.niggli_cell().unit_cell().parameters())
            # force obtuse angle
            tmp = map(lambda x: (x[0] + 3, abs(90. - x[1])),
                      enumerate(
                          p1cell[3:]))  # Index and difference from 90 deg
            tmp.sort(key=lambda x: x[1], reverse=True)
            if p1cell[tmp[0][0]] < 90:
                tmp = map(lambda x: (x[0] + 3, 90. - x[1]),
                          enumerate(p1cell[3:]))  # Index and 90-val.
                tmp.sort(key=lambda x: x[1], reverse=True)
                for i, v in tmp[:2]:
                    p1cell[i] = 180. - p1cell[i]

            p1cell = uctbx.unit_cell(p1cell)

        return p1cell, xs
Example #22
0
def run(xscale_inp):
    inp_dir = os.path.dirname(xscale_inp)

    files = map(
        lambda y: y[1].replace("*", ""),
        filter(lambda x: x[0] == "INPUT_FILE", get_xdsinp_keyword(xscale_inp)))
    files = map(
        lambda x: os.path.join(inp_dir, x)
        if not os.path.isabs(x) else x, files)
    symms = map(lambda x: XDS_ASCII(x, read_data=False).symm, files)
    cells = numpy.array(map(lambda x: x.unit_cell().parameters(), symms))
    sgs = map(lambda x: str(x.space_group_info()), symms)
    laues = map(
        lambda x: str(x.space_group().build_derived_reflection_intensity_group(
            False).info()), symms)

    median_cell = map(lambda i: numpy.median(cells[:, i]), xrange(6))
    mean_cell = map(lambda i: cells[:, i].mean(), xrange(6))
    cell_sd = map(lambda i: numpy.std(cells[:, i]), xrange(6))

    print "%4d files loaded" % len(files)
    print "Space groups:", ", ".join(
        map(lambda x: "%s (%d files)" % (x, sgs.count(x)), set(sgs)))
    print " Laue groups:", ", ".join(
        map(lambda x: "%s (%d files)" % (x, laues.count(x)), set(laues)))
    print " Median cell:", " ".join(map(lambda x: "%7.3f" % x, median_cell))
    print "   Mean cell:", " ".join(map(lambda x: "%7.3f" % x, mean_cell))
    print "          SD:", " ".join(map(lambda x: "%7.1e" % x, cell_sd))

    # for BLEND $CCP4/share/blend/R/blend0.R
    # names(macropar) <- c("cn","a","b","c","alpha","beta","gamma","mosa","ctoddist","wlength")
    ofs = open("forR_macropar.dat", "w")
    for i, cell in enumerate(cells):
        print >> ofs, "%4d" % (i + 1),
        print >> ofs, " ".join(map(lambda x: "%7.3f" % x, cell)),
        print >> ofs, " 0 0 0"
    ofs.close()

    shutil.copyfile("forR_macropar.dat", "forR_macropar.dat.bak")
    print
    print "Run BLEND?"
    print "Rscript $CCP4/share/blend/R/blend0.R"
Example #23
0
def decide_resolution(summarydat, params, log_out):
    best = choose_best_result(summarydat, log_out)
    if best is None:
        log_out.write("No data for deciding resolution cutoff.\n")
        return None

    log_out.write("Using %s for deciding resolution cutoff.\n" % best)
    iobs = XDS_ASCII(best, i_only=True).i_obs()  # Result with max CC1/2

    est = estimate_resolution_based_on_cc_half(iobs,
                                               params.cc_one_half_min,
                                               params.cc_half_tol,
                                               params.n_bins,
                                               log_out=log_out)
    if None not in (est.d_min, est.cc_at_d_min):
        log_out.write("Best resolution cutoff= %.2f A @CC1/2= %.4f\n" %
                      (est.d_min, est.cc_at_d_min))
    else:
        log_out.write("Can't decide resolution cutoff. No reflections??\n")
    return est.d_min
Example #24
0
def get_most_possible_symmetry(workdir):
    try:
        pointless_log = os.path.join(workdir, "pointless.log")
        xs = pointless.parse_pointless_output_for_symm(
            open(pointless_log).read()).get("symm")
        if xs is not None: return xs
    except:
        pass

    try:
        xs = get_crystal_symmetry_from_json(
            os.path.join(workdir, "integrated_experiments.json"))
        if xs is not None: return xs
    except:
        pass

    try:
        xac = XDS_ASCII(os.path.join(workdir, "DIALS.HKL"), read_data=False)
        return xac.symm
    except:
        pass
Example #25
0
        add_dataset(name="dataset", wavelength=0)

    mtz_dataset.add_miller_array(miller_array=i_obs.select(~remove_sel), column_root_label="ICUT")
    mtz_dataset.add_miller_array(miller_array=i_obs.select(remove_sel), column_root_label="IREMOVED")

    if f_obs is not None:
        mtz_dataset.add_miller_array(miller_array=f_obs.select(~remove_sel), column_root_label="FCUT")
        mtz_dataset.add_miller_array(miller_array=f_obs.select(remove_sel), column_root_label="FREMOVED")

    mtz_dataset.mtz_object().write(file_name=params.hklout)

    if params.xds_ascii is not None:
        # XXX Need to check unit cell compatiblity
        from yamtbx.dataproc.xds.xds_ascii import XDS_ASCII
        from cctbx import miller
        xa = XDS_ASCII(params.xds_ascii, sys.stdout)
        miller.map_to_asu(xa.symm.space_group_info().type(), False, xa.indices)
        removed_indices = i_obs.indices().select(remove_sel)

        out = open("removed_positions.dat", "w")
        for hkl, x, y, z, i, sigi in zip(xa.indices, xa.xd, xa.yd, xa.zd, xa.iobs, xa.sigma_iobs):
            if sigi <= 0:
                print "sigi<=0", x, y, z, i, sigi
                continue
            if hkl in removed_indices:
                print >>out, x, y, z, i, sigi

    if params.hklref is not None:
        #from eval_Rfree_factors_with_common_reflections import get_flag
        from cctbx.array_family import flex
        calc_r = lambda f_obs, f_model: flex.sum(flex.abs(f_obs.data() - f_model.data())) / flex.sum(f_obs.data())
Example #26
0
    def run_cycle(self, xds_ascii_files, reference_idx=None):
        if len(xds_ascii_files) == 0:
            print >>self.out, "Error: no files given."
            return

        xscale_inp = os.path.join(self.workdir, "XSCALE.INP")
        xscale_lp = os.path.join(self.workdir, "XSCALE.LP")

        # Get averaged cell for scaling
        sg, cell, lcv, alcv = self.average_cells(xds_ascii_files)
        self.cell_info_at_cycles[self.get_last_cycle_number()] = (cell, lcv, alcv)
        
        # Choose directory containing XDS_ASCII.HKL and set space group (but how??)
        inp_out = open(xscale_inp, "w")
        inp_out.write("MAXIMUM_NUMBER_OF_PROCESSORS= %d\n" % self.nproc)
        inp_out.write("SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell))
        inp_out.write(self.xscale_inp_head)

        for i, xds_ascii in enumerate(xds_ascii_files):
            f = self.altfile.get(xds_ascii, xds_ascii)
            tmp = min(os.path.relpath(f, self.workdir), f, key=lambda x:len(x))
            refstr = "*" if i==reference_idx else " "
            inp_out.write(" INPUT_FILE=%s%s\n" % (refstr,tmp))
            if len(self.xscale_params.corrections) != 3:
                inp_out.write("  CORRECTIONS= %s\n" % " ".join(self.xscale_params.corrections))
            if self.xscale_params.frames_per_batch is not None:
                frame_range = XDS_ASCII(f, read_data=False).get_frame_range()
                nframes = frame_range[1] - frame_range[0]
                nbatch = int(numpy.ceil(nframes / self.xscale_params.frames_per_batch))
                print >>self.out, "frame range of %s is %d,%d setting NBATCH= %d" % (f, frame_range[0], frame_range[1], nbatch)
                inp_out.write("  NBATCH= %d\n" % nbatch)

        inp_out.close()

        print >>self.out, "DEBUG:: running xscale with %3d files.." % len(xds_ascii_files)
        try:
            xscale.run_xscale(xscale_inp, cbf_to_dat=True,
                              use_tmpdir_if_available=self.xscale_params.use_tmpdir_if_available)
        except:
            print >>self.out, traceback.format_exc()

        xscale_log = open(xscale_lp).read()
        if "!!! ERROR !!! INSUFFICIENT NUMBER OF COMMON STRONG REFLECTIONS." in xscale_log:
            print >>self.out, "DEBUG:: Need to choose files."

            # From XDS ver. March 1, 2015, it kindly informs which dataset has no common reflections.
            # ..but does not print the table. Sometimes only one dataset is left. Should we make table by ourselves?
            # Older versions just print correlation table and stop.
            if "CORRELATIONS BETWEEN INPUT DATA SETS AFTER CORRECTIONS" in xscale_log:
                G = xscalelp.construct_data_graph(xscale_lp, min_common_refs=10)
                #nx.write_dot(G, os.path.join(self.workdir, "common_set_graph.dot"))
                cliques = [c for c in nx.find_cliques(G)]
                cliques.sort(key=lambda x:len(x))
                if self._counter == 1:
                    max_clique = cliques[-1]
                else:
                    idx_prevfile = 1 if self.reference_file else 0
                    max_clique = filter(lambda x: idx_prevfile in x, cliques)[-1] # xscale.hkl must be included!

                if self.reference_file:
                    max_clique = [0,] + filter(lambda x: x!=0, max_clique)

                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))

                try_later = map(lambda i: xds_ascii_files[i], filter(lambda x: x not in max_clique, G.nodes()))

                print >>self.out, "DEBUG:: %d files can be merged. %d files will be merged later." % (len(max_clique),
                                                                                                      len(try_later))
                print >>self.out, "DEBUG:: %d files are of no use." % (len(xds_ascii_files)-len(G.nodes()))
                for i in filter(lambda j: j not in G.nodes(), xrange(len(xds_ascii_files))):
                    self.removed_files.append(xds_ascii_files[i])
                    self.removed_reason[xds_ascii_files[i]] = "no_common_refls"

                self.run_cycle(map(lambda i: xds_ascii_files[i], max_clique))

                assert len(try_later) <= 0 # Never be the case with newer xscale!! (if the case, check_remove_list() should be modified to skip_num+=1
                if len(try_later) > 0:
                    print >>self.out, "Trying to merge %d remaining files.." % len(try_later)
                    next_files = [os.path.join(self.workdir, "xscale.hkl")] + try_later
                    if self.reference_file: next_files = [self.reference_file,] + next_files
                    self.workdir = self.request_next_workdir()
                    self.run_cycle(next_files)
                    return
            else:
                bad_idxes = xscalelp.read_no_common_ref_datasets(xscale_lp)
                print >>self.out, "DEBUG:: %d files are of no use." % (len(bad_idxes))

                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))

                # XXX Actually, not all datasets need to be thrown.. some of them are useful..
                for i in bad_idxes:
                    self.removed_files.append(xds_ascii_files[i])
                    self.removed_reason[xds_ascii_files[i]] = "no_common_refls"

                self.run_cycle(map(lambda i: xds_ascii_files[i], 
                                   filter(lambda j: j not in bad_idxes, xrange(len(xds_ascii_files)))))

            return
        elif "!!! ERROR !!! USELESS DATA ON INPUT REFLECTION FILE" in xscale_log:
            print >>self.out, "DEBUG:: Need to discard useless data."
            unuseful_data = [xscalelp.get_read_data(xscale_lp)[-1]] #filter(lambda x: x[2]==0, xscalelp.get_read_data(xscale_lp))
            if len(unuseful_data) == 0:
                print >>self.out, "I don't know how to fix it.."
                return
            remove_idxes = map(lambda x: x[0]-1, unuseful_data)
            remove_idxes = self.check_remove_list(remove_idxes)
            keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files)))
            for i in remove_idxes:
                self.removed_files.append(xds_ascii_files[i])
                self.removed_reason[xds_ascii_files[i]] = "useless"

            for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))
            self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes))
            return
        elif "INACCURATE SCALING FACTORS." in xscale_log:
            # Actually I don't know how to fix this.. (bug?) but worth proceeding (discarding bad data may solve problem).
            print >>self.out, "'INACCURATE SCALING FACTORS' happened.. but ignored."
        elif "!!! ERROR !!!" in xscale_log:
            print >>self.out, "Unknown error! please check the XSCALE.LP and fix the program."
            return

        # Re-scale by changing reference
        rescale_for = None
        if len(self.reject_method) == 0:
            rescale_for = self.reference_choice # may be None
        elif reference_idx is None:
            rescale_for = "bmed"
        
        if rescale_for is not None and len(xds_ascii_files) > 1:
            ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, rescale_for, return_as="index")
            if reference_idx != ref_num:
                print >>self.out, "Rescaling with %s" % rescale_for
                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))
                self.run_cycle(xds_ascii_files, reference_idx=ref_num)

        if len(self.reject_method) == 0:
            return

        # Remove bad data
        remove_idxes = []
        remove_reasons = {}

        if self.reject_method[0] == "framecc":
            print >>self.out, "Rejections based on frame CC"
            from yamtbx.dataproc.xds.command_line import xscale_cc_against_merged

            # list of [frame, n_all, n_common, cc] in the same order
            framecc = xscale_cc_against_merged.run(hklin=os.path.join(self.workdir, "xscale.hkl"),
                                                   output_dir=self.workdir,
                                                   nproc=self.nproc).values()
            if self.reject_params.framecc.method == "tukey":
                ccs = numpy.array(map(lambda x: x[3], reduce(lambda x,y:x+y,framecc)))
                ccs = ccs[ccs==ccs] # Remove nan
                q25, q75 = numpy.percentile(ccs, [25, 75])
                cc_cutoff  = q25 - self.reject_params.framecc.iqr_coeff * (q75 - q25)
                print >>self.out, " frameCC cutoff = %.4f (%.2f*IQR)" % (cc_cutoff, self.reject_params.framecc.iqr_coeff)
            else:
                cc_cutoff = self.reject_params.framecc.abs_cutoff
                print >>self.out, " frameCC cutoff = %.4f (value specified)" % cc_cutoff

            for i, cclist in enumerate(framecc):
                useframes = map(lambda x: x[0], filter(lambda x: x[3] > cc_cutoff, cclist))
                if len(useframes) == 0:
                    remove_idxes.append(i)
                    remove_reasons.setdefault(i, []).append("allbadframe")
                    continue

                f = xds_ascii_files[i]
                xac = XDS_ASCII(f)
                if set(useframes).issuperset(set(range(min(xac.iframe), max(xac.iframe)))):
                    continue # All useful frames.

                sel = xac.iframe == useframes[0]
                for x in useframes[1:]: sel |= xac.iframe == x
                if sum(sel) < 10: # XXX care I/sigma
                    remove_idxes.append(i)
                    remove_reasons.setdefault(i, []).append("allbadframe")
                    continue

                print >>self.out, "Extracting frames %s out of %d-%d in %s" % (",".join(map(str,useframes)),
                                                                               min(xac.iframe), max(xac.iframe),
                                                                               f)

                newf = self.request_file_modify(f)
                xac.write_selected(sel, newf)

            self.reject_method.pop(0) # Perform only once

        elif self.reject_method[0] == "lpstats":
            if "bfactor" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on B-factor outliers (%.2f*IQR)" % iqrc
                Bs = numpy.array(map(lambda x:x[1], xscalelp.get_k_b(xscale_lp)))
                q25, q75 = numpy.percentile(Bs, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, b in enumerate(Bs):
                    if b < lowlim or b > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_B")
                        count += 1

                print >>self.out, " %4d B-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "em.b" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on error model b outliers (%.2f*IQR)" % iqrc
                bs = numpy.array(map(lambda x:x[1], xscalelp.get_ISa(xscale_lp)))
                q25, q75 = numpy.percentile(bs, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, b in enumerate(bs):
                    if b < lowlim or b > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_em.b")
                        count += 1

                print >>self.out, " %4d error model b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "em.ab" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on error model a*b outliers (%.2f*IQR)" % iqrc
                vals = numpy.array(map(lambda x:x[0]*x[1], xscalelp.get_ISa(xscale_lp)))
                q25, q75 = numpy.percentile(vals, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, ab in enumerate(vals):
                    if ab < lowlim or ab > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_em.ab")
                        count += 1

                print >>self.out, " %4d error model a*b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "rfactor" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on R-factor outliers (%.2f*IQR)" % iqrc
                rstats = xscalelp.get_rfactors_for_each(xscale_lp)
                vals = numpy.array(map(lambda x:rstats[x][-1][1], rstats)) # Read total R-factor
                q25, q75 = numpy.percentile(vals, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, v in enumerate(vals):
                    if v < lowlim or v > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_R")
                        count += 1

                print >>self.out, " %4d R-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "pairwise_cc" in self.reject_params.lpstats.stats:
                corrs = xscalelp.get_pairwise_correlations(xscale_lp)
                if self.reject_params.lpstats.pwcc.method == "tukey":
                    q25, q75 = numpy.percentile(map(lambda x: x[3], corrs), [25, 75])
                    iqr = q75 - q25
                    lowlim = q25 - self.reject_params.lpstats.pwcc.iqr_coeff * iqr
                    print >>self.out, "Rejections based on pairwise_cc < %.4f (IQR=%.2f)" % (lowlim, iqr)
                else:
                    lowlim = self.reject_params.lpstats.pwcc.abs_cutoff
                    print >>self.out, "Rejections based on pairwise_cc < %.4f" % lowlim

                bad_corrs = filter(lambda x: x[3] < lowlim, corrs)
                idx_bad = {}
                for i, j, common_refs, corr, ratio, bfac in bad_corrs:
                    idx_bad[i] = idx_bad.get(i, 0) + 1
                    idx_bad[j] = idx_bad.get(j, 0) + 1

                idx_bad = idx_bad.items()
                idx_bad.sort(key=lambda x:x[1])
                count = 0
                for idx, badcount in reversed(idx_bad):
                    remove_idxes.append(idx-1)
                    remove_reasons.setdefault(idx-1, []).append("bad_pwcc")
                    bad_corrs = filter(lambda x: idx not in x[:2], bad_corrs)
                    if len(bad_corrs) == 0: break
                    fun_key = lambda x: x[3]
                    print >>self.out, " Removing idx=%d (CC %.3f..%.3f) remaining %d bad pairs" % (idx, 
                                                                                                   min(bad_corrs,key=fun_key)[3],
                                                                                                   max(bad_corrs,key=fun_key)[3],
                                                                                                   len(bad_corrs))
                    count += 1
                print >>self.out, " %4d pairwise CC outliers removed" % count

            self.reject_method.pop(0) # Perform only once
        elif self.reject_method[0] == "delta_cc1/2":
            print >>self.out, "Rejection based on delta_CC1/2 in %s shell" % self.delta_cchalf_bin
            table = xscalelp.read_stats_table(xscale_lp)
            i_stat = -1 if self.delta_cchalf_bin == "total" else -2
            prev_cchalf = table["cc_half"][i_stat]
            prev_nuniq = table["nuniq"][i_stat]
            # file_name->idx table
            remaining_files = collections.OrderedDict(map(lambda x: x[::-1], enumerate(xds_ascii_files)))

            # For consistent resolution limit
            inp_head = self.xscale_inp_head + "SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell)
            count = 0
            for i in xrange(len(xds_ascii_files)-1): # if only one file, cannot proceed.
                tmpdir = os.path.join(self.workdir, "reject_test_%.3d" % i)

                cchalf_list = xscale.calc_cchalf_by_removing(wdir=tmpdir, inp_head=inp_head,
                                                             inpfiles=remaining_files.keys(),
                                                             stat_bin=self.delta_cchalf_bin,
                                                             nproc=self.nproc,
                                                             nproc_each=self.nproc_each,
                                                             batchjobs=self.batchjobs)

                rem_idx, cc_i, nuniq_i = cchalf_list[0] # First (largest) is worst one to remove.
                rem_idx_in_org = remaining_files[remaining_files.keys()[rem_idx]]
                
                # Decision making by CC1/2
                print >>self.out, "DEBUG:: cycle %.3d remove %3d if %.2f*%d > %.2f*%d" % (i, rem_idx_in_org, 
                                                                                          cc_i, nuniq_i,
                                                                                          prev_cchalf, prev_nuniq)
                if cc_i*nuniq_i <= prev_cchalf*prev_nuniq: break
                print >>self.out, "Removing idx= %3d gained CC1/2 by %.2f" % (rem_idx_in_org, cc_i-prev_cchalf)

                prev_cchalf, prev_nuniq = cc_i, nuniq_i
                remove_idxes.append(rem_idx_in_org)
                remove_reasons.setdefault(rem_idx_in_org, []).append("bad_cchalf")
                del remaining_files[remaining_files.keys()[rem_idx]] # remove file from table
                count += 1

            print >>self.out, " %4d removed by DeltaCC1/2 method" % count

            if self.next_delta_cchalf_bin != []:
                self.delta_cchalf_bin = self.next_delta_cchalf_bin.pop(0)
            else:
                self.reject_method.pop(0)
        else:
            print >>self.out, "ERROR:: Unsupported reject_method (%s)" % reject_method

        # Remove duplicates
        remove_idxes = list(set(remove_idxes))
        remove_idxes = self.check_remove_list(remove_idxes)
        if len(remove_idxes) > 0:
            print >>self.out, "DEBUG:: Need to remove %d files" % len(remove_idxes)
            for i in sorted(remove_idxes): 
                print >>self.out, " %.3d %s" % (i, xds_ascii_files[i])
                self.removed_files.append(xds_ascii_files[i])
                self.removed_reason[xds_ascii_files[i]] = ",".join(remove_reasons[i])

        # Next run
        keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files)))
        if len(self.reject_method) > 0 or len(remove_idxes) > 0:
            self.workdir = self.request_next_workdir()
            self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes))
        elif self.reference_choice is not None and len(keep_idxes) > 1:
            # Just re-scale with B reference
            ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, self.reference_choice, return_as="index")
            if reference_idx != ref_num:
                print >>self.out, "Rescaling2 with %s" % self.reference_choice
                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))
                self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes), reference_idx=ref_num)
Example #27
0
def run(params):
    if os.path.isdir(params.workdir) and os.listdir(params.workdir):
        print "Directory already exists and not empty:", params.workdir
        return

    if params.reference_file is not None and params.program != "xscale":
        print "WARNING - reference file is not used unless program=xscale."

    if not os.path.isdir(params.workdir):
        os.makedirs(params.workdir)

    if params.batch.engine == "sge":
        batchjobs = batchjob.SGE(pe_name=params.batch.sge_pe_name)
    elif params.batch.engine == "sh":
        batchjobs = batchjob.ExecLocal(max_parallel=params.batch.sh_max_jobs)
    else:
        raise "Unknown batch engine: %s" % params.batch.engine

    out = multi_out()
    out.register("log", open(os.path.join(params.workdir, "multi_merge.log"), "w"), atexit_send_to=None)
    out.register("stdout", sys.stdout)

    print >>out, "Paramters:"
    libtbx.phil.parse(master_params_str).format(params).show(out=out, prefix=" ")
    print >>out, ""

    # XXX Not works when clustering is used..
    html_report = multi_merging.html_report.HtmlReportMulti(os.path.abspath(params.workdir))
    try: html_report.add_params(params, master_params_str)
    except: print >>out, traceback.format_exc()

    xds_ascii_files = map(lambda x: x[:(x.index("#") if "#" in x else None)].strip(), open(params.lstin))
    xds_ascii_files = filter(lambda x: x!="" and os.path.isfile(x), xds_ascii_files)
    xds_ascii_files = map(lambda x: os.path.abspath(x), xds_ascii_files)

    cells = collections.OrderedDict()
    laues = {} # for check
    for xac in xds_ascii_files:
        try:
            symm = XDS_ASCII(xac, read_data=False).symm
        except:
            try:
                symm = 	any_reflection_file(xac).as_miller_arrays()[0].crystal_symmetry()
            except:
                print >>out, "Error in reading %s" % xac
                print >>out, traceback.format_exc()
                return
        cells[xac] = symm.unit_cell().parameters()
        laue = symm.space_group().build_derived_reflection_intensity_group(False).info()
        laues.setdefault(str(laue),{}).setdefault(symm.space_group_info().type().number(), []).append(xac)

    if len(laues) > 1:
        print >>out, "ERROR! more than one space group included."
        for laue in laues:
            print "Laue symmetry", laue
            for sg in laues[laue]:
                print >>out, " SPACE_GROUP_NUMBER= %d (%d data)" % (sg, len(laues[laue][sg]))
                for f in laues[laue][sg]: print >>out, "  %s" % f
                print >>out, ""
        return

    space_group = None
    if params.space_group is not None:
        space_group = sgtbx.space_group_info(params.space_group).group()
        laue_given = str(space_group.build_derived_reflection_intensity_group(False).info())
        if laue_given != laues.keys()[0]:
            print >>out, "ERROR! user-specified space group (space_group=%s) is not compatible with input files (%s)" % (params.space_group, laues.keys()[0])
            return
    else:
        tmp = sgtbx.space_group_info(laues.values()[0].keys()[0]).group().build_derived_reflection_intensity_group(True)
        print >>out, "Space group for merging:", tmp.info()
            
    try: html_report.add_cells_and_files(cells, laues.keys()[0])
    except: print >>out, traceback.format_exc()

    data_for_merge = []
    if params.clustering == "blend":
        if params.blend.use_old_result is None:
            blend_wdir = os.path.join(params.workdir, "blend")
            os.mkdir(blend_wdir)
            blend.run_blend0R(blend_wdir, xds_ascii_files)
            print >>out, "\nRunning BLEND with analysis mode"
        else:
            blend_wdir = params.blend.use_old_result
            print >>out, "\nUsing precalculated BLEND result in %s" % params.blend.use_old_result

        blend_clusters = blend.BlendClusters(workdir=blend_wdir, d_min=params.d_min)
        summary_out = os.path.join(blend_wdir, "blend_cluster_summary.dat")
        clusters = blend_clusters.show_cluster_summary(out=open(summary_out, "w"))
        print >>out, "Clusters found by BLEND were summarized in %s" % summary_out

        if params.blend.min_cmpl is not None:
            clusters = filter(lambda x: x[3] >= params.blend.min_cmpl, clusters)
        if params.blend.min_acmpl is not None:
            clusters = filter(lambda x: x[5] >= params.blend.min_acmpl, clusters)            
        if params.blend.min_redun is not None:
            clusters = filter(lambda x: x[4] >= params.blend.min_redun, clusters)
        if params.blend.min_aredun is not None:
            clusters = filter(lambda x: x[6] >= params.blend.min_aredun, clusters)            
        if params.blend.max_LCV is not None:
            clusters = filter(lambda x: x[7] <= params.blend.max_LCV, clusters)
        if params.blend.max_aLCV is not None:
            clusters = filter(lambda x: x[8] <= params.blend.max_aLCV, clusters)

        if params.max_clusters is not None and len(clusters) > params.max_clusters:
            print >>out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % (params.max_clusters, len(clusters))
            clusters = clusters[:params.max_clusters]

        print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters)
        for clno, IDs, clh, cmpl, redun, acmpl, aredun, LCV, aLCV in clusters: # process largest first
            print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f LCV= %5.1f aLCV=%5.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun, LCV, aLCV)
            data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno),
                                   map(lambda x: blend_clusters.files[x-1], IDs),
                                   LCV, aLCV,clh))
        print >>out
        try: html_report.add_clutering_result(clusters, "blend")
        except: print >>out, traceback.format_exc()

    elif params.clustering == "cc":
        ccc_wdir = os.path.join(params.workdir, "cc_clustering")
        os.mkdir(ccc_wdir)
        cc_clusters = cc_clustering.CCClustering(ccc_wdir, xds_ascii_files,
                                                 d_min=params.cc_clustering.d_min if params.cc_clustering.d_min is not None else params.d_min,
                                                 min_ios=params.cc_clustering.min_ios)
        print >>out, "\nRunning CC-based clustering"

        cc_clusters.do_clustering(nproc=params.cc_clustering.nproc,
                                  b_scale=params.cc_clustering.b_scale,
                                  use_normalized=params.cc_clustering.use_normalized,
                                  html_maker=html_report)
        summary_out = os.path.join(ccc_wdir, "cc_cluster_summary.dat")
        clusters = cc_clusters.show_cluster_summary(d_min=params.d_min, out=open(summary_out, "w"))
        print >>out, "Clusters were summarized in %s" % summary_out

        if params.cc_clustering.min_cmpl is not None:
            clusters = filter(lambda x: x[3] >= params.cc_clustering.min_cmpl, clusters)
        if params.cc_clustering.min_acmpl is not None:
            clusters = filter(lambda x: x[5] >= params.cc_clustering.min_acmpl, clusters)            
        if params.cc_clustering.min_redun is not None:
            clusters = filter(lambda x: x[4] >= params.cc_clustering.min_redun, clusters)
        if params.cc_clustering.min_aredun is not None:
            clusters = filter(lambda x: x[6] >= params.cc_clustering.min_aredun, clusters)            
        if params.cc_clustering.max_clheight is not None:
            clusters = filter(lambda x: x[2] <= params.cc_clustering.max_clheight, clusters)

        if params.max_clusters is not None and len(clusters) > params.max_clusters:
            print >>out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % (params.max_clusters, len(clusters))
            clusters = clusters[:params.max_clusters]

        print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters)
        for clno, IDs, clh, cmpl, redun, acmpl, aredun in clusters: # process largest first
            print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun)
            data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno),
                                   map(lambda x: xds_ascii_files[x-1], IDs),
                                   float("nan"),float("nan"),clh))
        print >>out

        try: html_report.add_clutering_result(clusters, "cc_clustering")
        except: print >>out, traceback.format_exc()
        
    else:
        data_for_merge.append((os.path.join(params.workdir, "all_data"),
                               xds_ascii_files, float("nan"), float("nan"), 0))

    ofs_summary = open(os.path.join(params.workdir, "cluster_summary.dat"), "w")
    ofs_summary.write("# d_min= %.3f A\n" % (params.d_min if params.d_min is not None else float("nan")))
    ofs_summary.write("# LCV and aLCV are values of all data\n")
    ofs_summary.write("     cluster  ClH   LCV aLCV run ds.all ds.used  Cmpl Redun I/sigI Rmeas CC1/2 Cmpl.ou Red.ou I/sig.ou Rmeas.ou CC1/2.ou Cmpl.in Red.in I/sig.in Rmeas.in CC1/2.in SigAno.in CCano.in WilsonB Aniso  \n")

    out.flush()

    def write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats):
        tmps = "%12s %5.2f %4.1f %4.1f %3d %6d %7d %5.1f %5.1f %6.2f %5.1f %5.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %9.1f %8.1f %7.2f %7.1e\n"
        ofs_summary.write(tmps % (os.path.relpath(workdir, params.workdir), clh, LCV, aLCV, cycle,
                                  len(xds_files), num_files,
                                  stats["cmpl"][0],
                                  stats["redundancy"][0],
                                  stats["i_over_sigma"][0],
                                  stats["r_meas"][0],
                                  stats["cc_half"][0],
                                  stats["cmpl"][2],
                                  stats["redundancy"][2],
                                  stats["i_over_sigma"][2],
                                  stats["r_meas"][2],
                                  stats["cc_half"][2],
                                  stats["cmpl"][1],
                                  stats["redundancy"][1],
                                  stats["i_over_sigma"][1],
                                  stats["r_meas"][1],
                                  stats["cc_half"][1],
                                  stats["sig_ano"][1],
                                  stats["cc_ano"][1],
                                  stats["xtriage_log"].wilson_b,
                                  stats["xtriage_log"].anisotropy,
                                  ))
        ofs_summary.flush()
    # write_ofs_summary()

    if "merging" in params.batch.par_run:
        params.nproc = params.batch.nproc_each
        jobs = []
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            if not os.path.exists(workdir): os.makedirs(workdir)
            shname = "merge_%s.sh" % os.path.relpath(workdir, params.workdir)
            pickle.dump((params, os.path.abspath(workdir), xds_files, cells, space_group, batchjobs), open(os.path.join(workdir, "args.pkl"), "w"), -1)
            job = batchjob.Job(workdir, shname, nproc=params.batch.nproc_each)
            job.write_script("""\
cd "%s" || exit 1
"%s" -c '\
import pickle; \
from yamtbx.dataproc.auto.command_line.multi_merge import merge_datasets; \
args = pickle.load(open("args.pkl")); \
ofs = open("result.pkl","w"); \
ret = merge_datasets(*args); \
pickle.dump(ret, ofs); \
'
""" % (os.path.abspath(workdir), sys.executable))
            batchjobs.submit(job)
            jobs.append(job)

        batchjobs.wait_all(jobs)
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            try:
                results = pickle.load(open(os.path.join(workdir, "result.pkl")))
            except:
                print >>out, "Error in unpickling result in %s" % workdir
                print >>out, traceback.format_exc()
                results = []

            if len(results) == 0:
                ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir))

            lcv, alcv = float("nan"), float("nan")
            for cycle, wd, num_files, stats in results:
                lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV)
                write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files, num_files, stats)

            # Last lcv & alcv
            try: html_report.add_merge_result(workdir, clh, lcv, alcv, xds_files, results[-1][2], results[-1][3])
            except: print >>out, traceback.format_exc()
    else:
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            print >>out, "Merging %s..." % os.path.relpath(workdir, params.workdir)
            out.flush()
            results = merge_datasets(params, workdir, xds_files, cells, space_group, batchjobs)
            
            if len(results) == 0:
                ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir))

            for cycle, wd, num_files, stats in results:
                lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV)
                write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files, num_files, stats)

            try: html_report.add_merge_result(workdir, clh, lcv, alcv, xds_files, results[-1][2], results[-1][3])
            except: print >>out, traceback.format_exc()

    try: html_report.write_html()
    except: print >>out, traceback.format_exc()

    print "firefox %s" % os.path.join(html_report.root, "report.html")
    return
Example #28
0
def xds2shelx(xds_file,
              dir_name,
              prefix=None,
              dmin=None,
              dmax=None,
              force_anomalous=False,
              space_group=None,
              flag_source=None,
              add_flag=False):
    if prefix is None:
        prefix = os.path.splitext(os.path.basename(xds_file))[0]

    hklout = prefix + ".hkl"

    # if output file already exists, exit.
    if os.path.isfile(os.path.join(dir_name, hklout)):
        raise Exception(os.path.join(dir_name, hklout), "already exists.")

    # read header
    xac = XDS_ASCII(xds_file, read_data=False)

    wavelength = xac.wavelength
    if xac.wavelength is None and xac.input_files:
        wavelength = float(xac.input_files.values()[0][1])
    else:
        wavelength = 1.0

    anom_flag = xac.anomalous
    if force_anomalous: anom_flag = True

    sginfo_org = xac.symm.space_group_info()

    if space_group:
        sginfo = sgtbx.space_group_info(space_group)
    else:
        sginfo = sginfo_org

    sg = sginfo.group()

    # make output directory
    if not os.path.isdir(dir_name):
        os.makedirs(dir_name)

    logout = open(os.path.join(dir_name, "xds2shelx.log"), "w")
    print >> logout, "xds2shelx.py running in %s" % os.getcwd()
    print >> logout, "output directory: %s" % dir_name
    print >> logout, "original file: %s" % xds_file
    print >> logout, "flag_source: %s" % flag_source
    print >> logout, "space group: %s (original=%s, requested space_group=%s)" % (
        sginfo, sginfo_org, space_group)
    if sginfo_org.group().build_derived_reflection_intensity_group(
            False) != sg.build_derived_reflection_intensity_group(False):
        print >> logout, "  WARNING!! specified space group is incompatible with original file (%s)." % sginfo_org
    print >> logout, "anomalous: %s (original=%s force_anomalous=%s)" % (
        anom_flag, xac.anomalous, force_anomalous)
    print >> logout, ""
    logout.flush()

    if sg.is_centric() and not sg.is_origin_centric():
        print >> logout, "Error: in shelx, the origin must lie on a center of symmetry."
        logout.flush()
        return
    ##
    if not os.path.exists(os.path.join(dir_name, "original")):
        os.symlink(xds_file, os.path.join(dir_name, "original"))

    ##
    # prepare XDSCONV.INP and run
    #

    with open(os.path.join(dir_name, "XDSCONV.INP"), "w") as ofs:
        ofs.write("OUTPUT_FILE=%s SHELX\n" % hklout)
        ofs.write("INPUT_FILE=original\n")
        ofs.write("MERGE= FALSE\n")
        ofs.write("FRIEDEL'S_LAW= %s\n" % ("FALSE" if anom_flag else "TRUE"))

        if None not in (dmin, dmax):
            ofs.write("INCLUDE_RESOLUTION_RANGE= %s %s\n" % (dmax, dmin))

    call(cmd="xdsconv",
         wdir=dir_name,
         expects_in=["original"],
         expects_out=[hklout],
         stdout=logout)

    cell_str = xtal.format_unit_cell(xac.symm.unit_cell(),
                                     lfmt="%8.4f",
                                     afmt="%7.3f")
    with open(os.path.join(dir_name, "%s.ins" % prefix), "w") as ofs:
        ofs.write("CELL %.4f %s\n" % (wavelength, cell_str))
        ofs.write("ZERR 1 0 0 0 0 0 0\n")
        ofs.write("LATT %s\n" % xtal.shelx_latt(sg))
        for iop in range(1, sg.n_smx()):
            ofs.write("SYMM %s\n" % sg(iop).as_xyz(
                decimal=True, t_first=True, symbol_letters="XYZ"))
        ofs.write("SFAC C N O S\n")
        ofs.write("UNIT 6 6 6 6\n")
        ofs.write("FIND 10\n")  # TODO more intelligent
        ofs.write("NTRY 1000\n")
        ofs.write("HKLF 4\n")
        ofs.write("END\n")
Example #29
0
        print "altered:", idxes

    ksb.assign_operators([debug_op, sgtbx.change_of_basis_op("h,k,l")])
    print "right?:", [
        i for i, x in enumerate(ksb.best_operators) if not x.is_identity_op()
    ]
    #ksb.debug_write_mtz()
    #ksb.modify_xds_ascii_files()

    quit()

    arrays = []
    for f in xac_files:
        print "Reading", f
        xac = XDS_ASCII(f, i_only=True)
        xac.remove_rejected()
        a = xac.i_obs().resolution_filter(d_min=3)
        a = a.merge_equivalents(use_internal_variance=False).array()
        arrays.append(a)

    symm = arrays[0].crystal_symmetry()
    cosets = reindex.reindexing_operators(symm, symm)
    reidx_ops = cosets.combined_cb_ops()
    reidx_ops.sort(key=lambda x: not x.is_identity_op())
    print " Possible reindex operators:", map(lambda x: str(x.as_hkl()),
                                              reidx_ops)

    determined = set([
        0,
    ])
Example #30
0
def reindex_with_specified_symm_worker(wd,
                                       wdr,
                                       topdir,
                                       log_out,
                                       reference_symm,
                                       sgnum_laue,
                                       prep_dials_files=False):
    """
    wd: directory where XDS file exists
    wdr: wd to return; a directory where transformed file should be saved.

    If wd!=wdr, files in wd/ are unchanged during procedure. Multiprocessing is unsafe when wd==wdr.
    """

    out = StringIO()
    print >> out, "%s:" % os.path.relpath(wd, topdir),

    # Find appropriate data
    xac_file = util.return_first_found_file(
        ("XDS_ASCII.HKL_noscale.org", "XDS_ASCII.HKL_noscale",
         "XDS_ASCII_fullres.HKL.org", "XDS_ASCII_fullres.HKL",
         "XDS_ASCII.HKL.org", "XDS_ASCII.HKL", "DIALS.HKL.org", "DIALS.HKL"),
        wd=wd)
    if xac_file is None:
        print >> out, "Can't find XDS_ASCII file in %s" % wd
        log_out.write(out.getvalue())
        log_out.flush()
        return (wdr, None)

    if xac_file.endswith(".org"): xac_file_out = xac_file[:-4]
    else: xac_file_out = xac_file

    xac = XDS_ASCII(xac_file, read_data=False)
    print >> out, "%s %s (%s)" % (os.path.basename(xac_file),
                                  xac.symm.space_group_info(), ",".join(
                                      map(lambda x: "%.2f" % x,
                                          xac.symm.unit_cell().parameters())))

    if xac.symm.reflection_intensity_symmetry(
            False).space_group_info().type().number() == sgnum_laue:
        if xac.symm.unit_cell().is_similar_to(
                reference_symm.unit_cell(), 0.1,
                10):  # XXX Check unit cell consistency!!
            print >> out, "  Already scaled with specified symmetry"
            log_out.write(out.getvalue())
            log_out.flush()

            if wd != wdr: shutil.copy2(xac_file, wdr)

            if prep_dials_files and "DIALS.HKL" not in xac_file:
                prepare_dials_files(wd, out, moveto=wdr)

            return (wdr, (numpy.array(xac.symm.unit_cell().parameters()),
                          os.path.join(wdr, os.path.basename(xac_file))))

    cosets = reindex.reindexing_operators(reference_symm, xac.symm, 0.2,
                                          20)  # XXX ISN'T THIS TOO LARGE?

    if len(cosets.combined_cb_ops()) == 0:
        print >> out, "Can't find operator:"
        xac.symm.show_summary(out, " ")
        reference_symm.show_summary(out, " ")
        log_out.write(out.getvalue())
        log_out.flush()
        return (wdr, None)

    if wd == wdr:
        dest = tempfile.mkdtemp(prefix="multiprep", dir=wd)
    else:
        dest = wdr

    hklout = os.path.join(dest, os.path.basename(xac_file_out))

    newcell = xac.write_reindexed(op=cosets.combined_cb_ops()[0],
                                  space_group=reference_symm.space_group(),
                                  hklout=hklout)

    if "DIALS.HKL" in os.path.basename(xac_file):
        outstr = 'output.experiments="%sreindexed_experiments.json" ' % os.path.join(
            dest, "")
        outstr += 'output.reflections="%sreindexed_reflections.pickle" ' % os.path.join(
            dest, "")
        for f in ("experiments.json", "indexed.pickle"):
            if not os.path.isfile(os.path.join(os.path.dirname(xac_file), f)):
                continue
            util.call(
                'dials.reindex %s change_of_basis_op=%s space_group="%s" %s' %
                (f, cosets.combined_cb_ops()[0].as_abc(),
                 reference_symm.space_group_info(), outstr),
                wdir=os.path.dirname(xac_file))
    elif prep_dials_files:
        prepare_dials_files(wd,
                            out,
                            space_group=reference_symm.space_group(),
                            reindex_op=cosets.combined_cb_ops()[0],
                            moveto=dest)

    newcell_str = " ".join(map(lambda x: "%.3f" % x, newcell.parameters()))
    print >> out, "  Reindexed to transformed cell: %s with %s" % (
        newcell_str, cosets.combined_cb_ops()[0].as_hkl())
    log_out.write(out.getvalue())
    log_out.flush()

    if wd == wdr:
        for f in glob.glob(os.path.join(dest, "*")):
            f_in_wd = os.path.join(wd, os.path.basename(f))
            if os.path.exists(f_in_wd) and not os.path.exists(f_in_wd +
                                                              ".org"):
                os.rename(f_in_wd, f_in_wd + ".org")
            os.rename(f, f_in_wd)

        shutil.rmtree(dest)
        ret = (numpy.array(newcell.parameters()),
               os.path.join(wd, os.path.basename(xac_file_out)))
    else:
        ret = (numpy.array(newcell.parameters()), hklout)

    return (wdr, ret)
Example #31
0
def run(params, log_out):
    xa = XDS_ASCII(params.xds_ascii, log_out)
    rejected_array = miller.array(miller_set=miller.set(
        crystal_symmetry=xa.symm, indices=xa.indices, anomalous_flag=False),
                                  data=xa.sigma_iobs < 0)
    xa_zd = miller.array(miller_set=miller.set(crystal_symmetry=xa.symm,
                                               indices=xa.indices,
                                               anomalous_flag=False),
                         data=xa.zd)

    # Read ZCAL, not ZOBS, because ZOBS (and XOBS, YOBS) can be zero (in case unobserved).
    integ_data = integrate_hkl_as_flex.reader(params.integrate_hkl,
                                              ["MAXC", "ZCAL"]).arrays()
    maxc_array, integ_zcal = integ_data["MAXC"], integ_data["ZCAL"]

    assert integ_zcal.unit_cell().is_similar_to(
        xa_zd.unit_cell())  # two set of indices should be comparable.

    overload_flags = maxc_array.customized_copy(
        data=maxc_array.data() == params.overload)
    print "Overloaded observations in INTEGRATE.HKL:", overload_flags.data(
    ).count(True)
    print "Rejected (sigma<0) observations in XDS_ASCII.HKL:", rejected_array.data(
    ).count(True)
    # common_sets() does not work correctly for unmerged data!

    rejected_zd = xa_zd.select(rejected_array.data())

    #reject_indices = flex.bool([False for i in xrange(overload_flags.size())])

    print "making indices..........."
    import yamtbx_utils_ext
    integ_zcal = integ_zcal.sort(
        by_value="packed_indices"
    )  # Must be sorted before C++ function below!!
    reject_indices = yamtbx_utils_ext.make_selection_for_xds_unmerged(
        rejected_zd.indices(), rejected_zd.data(), integ_zcal.indices(),
        integ_zcal.data(), 3.)
    """
    # This loop is too slow!
    for i in xrange(rejected_zd.size()):
        sel = integ_zcal.indices() == rejected_zd.indices()[i]
        sel &= (integ_zcal.data() - rejected_zd.data()[i]) < 3
        reject_indices.set_selected(sel, True)
        print i, rejected_zd.size(), sel.count(True)
        """
    """
    # This loop is also too slow!
    for j in xrange(integ_zcal.size()): # j: INTEGRATE.HKL
        if rejected_zd.indices()[i] != integ_zcal.indices()[j]:
            continue
        if abs(rejected_zd.data()[i] - integ_zcal.data()[j]) < 3: # within 3 frames.. OK?
            reject_indices[j] = True
    """

    print "Found rejected observations in INTEGRATE.HKL:", reject_indices.count(
        True)
    overload_flags.data().set_selected(reject_indices,
                                       False)  # Set 'Un-overloaded'
    print "Remained overloaded observations:", overload_flags.data().count(
        True)

    overload_flags_partial = overload_flags.map_to_asu().merge_equivalents(
        incompatible_flags_replacement=True).array()
    overload_flags_all = overload_flags.map_to_asu().merge_equivalents(
        incompatible_flags_replacement=False).array()

    mtz_object = iotbx.mtz.object(params.hklin). \
        add_crystal("crystal", "project", overload_flags_all.unit_cell()). \
        add_dataset(name="dataset", wavelength=0). \
        add_miller_array(miller_array=overload_flags_all, column_root_label="SATURATED_ALL"). \
        add_miller_array(miller_array=overload_flags_partial, column_root_label="SATURATED_PART"). \
        mtz_object()
    mtz_object.write(file_name=params.hklout)
Example #32
0
def run(params):
    if os.path.isdir(params.workdir) and os.listdir(params.workdir):
        print "Directory already exists and not empty:", params.workdir
        return

    # Check parameters
    if params.program == "xscale":
        if (params.xscale.frames_per_batch,
                params.xscale.degrees_per_batch).count(None) == 0:
            print "ERROR! You can't specify both of xscale.frames_per_batch and xscale.degrees_per_batch"
            return

    if params.reference_file is not None and params.program != "xscale":
        print "WARNING - reference file is not used unless program=xscale."

    if not os.path.isdir(params.workdir):
        os.makedirs(params.workdir)

    if params.batch.engine == "sge":
        batchjobs = batchjob.SGE(pe_name=params.batch.sge_pe_name)
    elif params.batch.engine == "sh":
        batchjobs = batchjob.ExecLocal(max_parallel=params.batch.sh_max_jobs)
    else:
        raise "Unknown batch engine: %s" % params.batch.engine

    out = multi_out()
    out.register("log",
                 open(os.path.join(params.workdir, "multi_merge.log"), "w"),
                 atexit_send_to=None)
    out.register("stdout", sys.stdout)
    out.write("kamo.multi_merge started at %s\n\n" %
              time.strftime("%Y-%m-%d %H:%M:%S"))
    time_started = time.time()

    print >> out, "Paramters:"
    libtbx.phil.parse(master_params_str).format(params).show(out=out,
                                                             prefix=" ")
    print >> out, ""

    # XXX Not works when clustering is used..
    html_report = multi_merging.html_report.HtmlReportMulti(
        os.path.abspath(params.workdir))
    try:
        html_report.add_params(params, master_params_str)
    except:
        print >> out, traceback.format_exc()

    xds_ascii_files = util.read_path_list(params.lstin,
                                          only_exists=True,
                                          as_abspath=True,
                                          err_out=out)

    if not xds_ascii_files:
        print >> out, "ERROR! Cannot find (existing) files in %s." % params.lstin
        return

    if len(xds_ascii_files) < 2:
        print >> out, "ERROR! Only one file in %s." % params.lstin
        print >> out, "       Give at least two files for merging."
        return

    cells = collections.OrderedDict()
    laues = {}  # for check
    for xac in xds_ascii_files:
        try:
            symm = XDS_ASCII(xac, read_data=False).symm
        except:
            print >> out, "Error in reading %s" % xac
            print >> out, traceback.format_exc()
            return
        cells[xac] = symm.unit_cell().parameters()
        laue = symm.space_group().build_derived_reflection_intensity_group(
            False).info()
        laues.setdefault(str(laue), {}).setdefault(
            symm.space_group_info().type().number(), []).append(xac)

    if len(laues) > 1:
        print >> out, "ERROR! more than one space group included."
        for laue in laues:
            print "Laue symmetry", laue
            for sg in laues[laue]:
                print >> out, " SPACE_GROUP_NUMBER= %d (%d data)" % (
                    sg, len(laues[laue][sg]))
                for f in laues[laue][sg]:
                    print >> out, "  %s" % f
                print >> out, ""
        return

    space_group = None
    if params.space_group is not None:
        space_group = sgtbx.space_group_info(params.space_group).group()
        laue_given = str(
            space_group.build_derived_reflection_intensity_group(False).info())
        if laue_given != laues.keys()[0]:
            print >> out, "ERROR! user-specified space group (space_group=%s) is not compatible with input files (%s)" % (
                params.space_group, laues.keys()[0])
            return

        sg_refset = space_group.info().as_reference_setting().group()
        if space_group != sg_refset:
            print >> out, "Sorry! currently space group in non-reference setting is not supported."
            print >> out, "(You requested %s, which is different from reference setting: %s)" % (
                space_group.info(), sg_refset.info())
            return
    else:
        tmp = sgtbx.space_group_info(
            laues.values()[0].keys()
            [0]).group().build_derived_reflection_intensity_group(True)
        print >> out, "Space group for merging:", tmp.info()

    test_flag_will_be_transferred = False

    if params.reference.data is not None:
        params.reference.data = os.path.abspath(params.reference.data)
        print >> out, "Reading reference data file: %s" % params.reference.data

        tmp = iotbx.file_reader.any_file(params.reference.data,
                                         force_type="hkl",
                                         raise_sorry_if_errors=True)
        if params.reference.copy_test_flag:
            from yamtbx.dataproc.command_line import copy_free_R_flag
            if None in copy_free_R_flag.get_flag_array(
                    tmp.file_server.miller_arrays, log_out=out):
                print >> out, " Warning: no test flag found in reference file (%s)" % params.reference.data
            else:
                test_flag_will_be_transferred = True
                print >> out, " test flag will be transferred"

        if space_group is not None:
            if space_group != tmp.file_server.miller_arrays[0].space_group():
                print >> out, " ERROR! space_group=(%s) and that of reference.data (%s) do not match." % (
                    space_group.info(),
                    tmp.file_server.miller_arrays[0].space_group_info())
                return
        else:
            space_group = tmp.file_server.miller_arrays[0].space_group()
            print >> out, " space group for merging: %s" % space_group.info()

    if params.add_test_flag:
        if test_flag_will_be_transferred:
            print >> out, "Warning: add_test_flag=True was set, but the flag will be transferred from the reference file given."
        else:
            from cctbx import r_free_utils

            med_cell = numpy.median(cells.values(), axis=0)
            d_min = max(
                params.d_min - 0.2, 1.0
            ) if params.d_min is not None else 1.5  # to prevent infinite set
            sg = space_group
            if not sg:
                sg = sgtbx.space_group_info(
                    laues.values()[0].keys()
                    [0]).group().build_derived_reflection_intensity_group(True)
            tmp = miller.build_set(crystal.symmetry(tuple(med_cell),
                                                    space_group=sg),
                                   False,
                                   d_min=d_min,
                                   d_max=None)
            print >> out, "Generating test set using the reference symmetry:"
            crystal.symmetry.show_summary(tmp, out, " ")
            tmp = tmp.generate_r_free_flags(fraction=0.05,
                                            max_free=None,
                                            lattice_symmetry_max_delta=5.0,
                                            use_lattice_symmetry=True,
                                            n_shells=20)
            tmp.show_r_free_flags_info(out=out, prefix=" ")
            tmp = tmp.customized_copy(
                data=r_free_utils.export_r_free_flags_for_ccp4(
                    flags=tmp.data(), test_flag_value=True))

            mtz_object = tmp.as_mtz_dataset(
                column_root_label="FreeR_flag").mtz_object()
            test_flag_mtz = os.path.abspath(
                os.path.join(params.workdir, "test_flag.mtz"))
            mtz_object.write(file_name=test_flag_mtz)

            # Override the parameters
            params.reference.copy_test_flag = True
            params.reference.data = test_flag_mtz

    try:
        html_report.add_cells_and_files(cells, laues.keys()[0])
    except:
        print >> out, traceback.format_exc()

    data_for_merge = []
    if params.clustering == "blend":
        if params.blend.use_old_result is None:
            blend_wdir = os.path.join(params.workdir, "blend")
            os.mkdir(blend_wdir)
            blend.run_blend0R(blend_wdir, xds_ascii_files)
            print >> out, "\nRunning BLEND with analysis mode"
        else:
            blend_wdir = params.blend.use_old_result
            print >> out, "\nUsing precalculated BLEND result in %s" % params.blend.use_old_result

        blend_clusters = blend.BlendClusters(workdir=blend_wdir,
                                             d_min=params.d_min)
        summary_out = os.path.join(blend_wdir, "blend_cluster_summary.dat")
        clusters = blend_clusters.show_cluster_summary(
            out=open(summary_out, "w"))
        print >> out, "Clusters found by BLEND were summarized in %s" % summary_out

        if params.blend.min_cmpl is not None:
            clusters = filter(lambda x: x[3] >= params.blend.min_cmpl,
                              clusters)
        if params.blend.min_acmpl is not None:
            clusters = filter(lambda x: x[5] >= params.blend.min_acmpl,
                              clusters)
        if params.blend.min_redun is not None:
            clusters = filter(lambda x: x[4] >= params.blend.min_redun,
                              clusters)
        if params.blend.min_aredun is not None:
            clusters = filter(lambda x: x[6] >= params.blend.min_aredun,
                              clusters)
        if params.blend.max_LCV is not None:
            clusters = filter(lambda x: x[7] <= params.blend.max_LCV, clusters)
        if params.blend.max_aLCV is not None:
            clusters = filter(lambda x: x[8] <= params.blend.max_aLCV,
                              clusters)

        if params.max_clusters is not None and len(
                clusters) > params.max_clusters:
            print >> out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % (
                params.max_clusters, len(clusters))
            clusters = clusters[:params.max_clusters]

        if clusters:
            print >> out, "With specified conditions, following %d clusters will be merged:" % len(
                clusters)
        else:
            print >> out, "\nERROR: No clusters satisfied the specified conditions for merging!"
            print >> out, "Please change criteria of completeness or redundancy"
            print >> out, "Here is the table of completeness and redundancy for each cluster:\n"
            print >> out, open(summary_out).read()

        for clno, IDs, clh, cmpl, redun, acmpl, aredun, LCV, aLCV in clusters:  # process largest first
            print >> out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f LCV= %5.1f aLCV=%5.1f" % (
                clno, len(IDs), clh, cmpl, redun, acmpl, aredun, LCV, aLCV)
            data_for_merge.append((os.path.join(params.workdir,
                                                "cluster_%.4d" % clno),
                                   map(lambda x: blend_clusters.files[x - 1],
                                       IDs), LCV, aLCV, clh))
        print >> out
        try:
            html_report.add_clutering_result(clusters, "blend")
        except:
            print >> out, traceback.format_exc()

    elif params.clustering == "cc":
        ccc_wdir = os.path.join(params.workdir, "cc_clustering")
        os.mkdir(ccc_wdir)
        cc_clusters = cc_clustering.CCClustering(
            ccc_wdir,
            xds_ascii_files,
            d_min=params.cc_clustering.d_min
            if params.cc_clustering.d_min is not None else params.d_min,
            min_ios=params.cc_clustering.min_ios)
        print >> out, "\nRunning CC-based clustering"

        cc_clusters.do_clustering(
            nproc=params.cc_clustering.nproc,
            b_scale=params.cc_clustering.b_scale,
            use_normalized=params.cc_clustering.use_normalized,
            cluster_method=params.cc_clustering.method,
            distance_eqn=params.cc_clustering.cc_to_distance,
            min_common_refs=params.cc_clustering.min_common_refs,
            html_maker=html_report)
        summary_out = os.path.join(ccc_wdir, "cc_cluster_summary.dat")
        clusters = cc_clusters.show_cluster_summary(d_min=params.d_min,
                                                    out=open(summary_out, "w"))
        print >> out, "Clusters were summarized in %s" % summary_out

        if params.cc_clustering.min_cmpl is not None:
            clusters = filter(lambda x: x[3] >= params.cc_clustering.min_cmpl,
                              clusters)
        if params.cc_clustering.min_acmpl is not None:
            clusters = filter(lambda x: x[5] >= params.cc_clustering.min_acmpl,
                              clusters)
        if params.cc_clustering.min_redun is not None:
            clusters = filter(lambda x: x[4] >= params.cc_clustering.min_redun,
                              clusters)
        if params.cc_clustering.min_aredun is not None:
            clusters = filter(
                lambda x: x[6] >= params.cc_clustering.min_aredun, clusters)
        if params.cc_clustering.max_clheight is not None:
            clusters = filter(
                lambda x: x[2] <= params.cc_clustering.max_clheight, clusters)

        if params.max_clusters is not None and len(
                clusters) > params.max_clusters:
            print >> out, "Only first %d (/%d) clusters will be merged (as specified by max_clusters=)" % (
                params.max_clusters, len(clusters))
            clusters = clusters[:params.max_clusters]

        if clusters:
            print >> out, "With specified conditions, following %d clusters will be merged:" % len(
                clusters)
        else:
            print >> out, "\nERROR: No clusters satisfied the specified conditions for merging!"
            print >> out, "Please change criteria of completeness or redundancy"
            print >> out, "Here is the table of completeness and redundancy for each cluster:\n"
            print >> out, open(summary_out).read()

        for clno, IDs, clh, cmpl, redun, acmpl, aredun, ccmean, ccmin in clusters:  # process largest first
            print >> out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f CCmean=% .4f CCmin=% .4f" % (
                clno, len(IDs), clh, cmpl, redun, acmpl, aredun, ccmean, ccmin)
            data_for_merge.append((os.path.join(params.workdir,
                                                "cluster_%.4d" % clno),
                                   map(lambda x: xds_ascii_files[x - 1],
                                       IDs), float("nan"), float("nan"), clh))
        print >> out

        try:
            html_report.add_clutering_result(clusters, "cc_clustering")
        except:
            print >> out, traceback.format_exc()

    else:
        data_for_merge.append((os.path.join(params.workdir,
                                            "all_data"), xds_ascii_files,
                               float("nan"), float("nan"), 0))

    ofs_summary = open(os.path.join(params.workdir, "cluster_summary.dat"),
                       "w")
    ofs_summary.write(
        "# d_min= %.3f A\n" %
        (params.d_min if params.d_min is not None else float("nan")))
    ofs_summary.write("# LCV and aLCV are values of all data\n")
    ofs_summary.write(
        "     cluster    ClH  LCV aLCV run ds.all ds.used  Cmpl Redun I/sigI Rmeas CC1/2 Cmpl.ou Red.ou I/sig.ou Rmeas.ou CC1/2.ou Cmpl.in Red.in I/sig.in Rmeas.in CC1/2.in SigAno.in CCano.in WilsonB Aniso.bst Aniso.wst dmin.est\n"
    )

    out.flush()

    def write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files,
                          stats):
        tmps = "%12s %6.2f %4.1f %4.1f %3d %6d %7d %5.1f %5.1f %6.2f %5.1f %5.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %9.1f %8.1f %7.2f %9.2f %9.2f %.2f\n"
        ofs_summary.write(tmps % (
            os.path.relpath(workdir, params.workdir),
            clh,
            LCV,
            aLCV,
            cycle,
            len(xds_files),
            num_files,
            stats["cmpl"][0],
            stats["redundancy"][0],
            stats["i_over_sigma"][0],
            stats["r_meas"][0],
            stats["cc_half"][0],
            stats["cmpl"][2],
            stats["redundancy"][2],
            stats["i_over_sigma"][2],
            stats["r_meas"][2],
            stats["cc_half"][2],
            stats["cmpl"][1],
            stats["redundancy"][1],
            stats["i_over_sigma"][1],
            stats["r_meas"][1],
            stats["cc_half"][1],
            stats["sig_ano"][1],
            stats["cc_ano"][1],
            stats["xtriage_log"].wilson_b,
            #stats["xtriage_log"].anisotropy,
            stats["aniso"]["d_min_best"],
            stats["aniso"]["d_min_worst"],
            stats["dmin_est"],
        ))
        ofs_summary.flush()

    # write_ofs_summary()

    if "merging" in params.batch.par_run:
        params.nproc = params.batch.nproc_each
        jobs = []
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            if not os.path.exists(workdir): os.makedirs(workdir)
            shname = "merge_%s.sh" % os.path.relpath(workdir, params.workdir)
            pickle.dump((params, os.path.abspath(workdir), xds_files, cells,
                         space_group),
                        open(os.path.join(workdir, "args.pkl"), "w"), -1)
            job = batchjob.Job(workdir, shname, nproc=params.batch.nproc_each)
            job.write_script("""\
cd "%s" || exit 1
"%s" -c '\
import pickle; \
from yamtbx.dataproc.auto.command_line.multi_merge import merge_datasets; \
args = pickle.load(open("args.pkl")); \
ret = merge_datasets(*args); \
pickle.dump(ret, open("result.pkl","w")); \
'
""" % (os.path.abspath(workdir), sys.executable))
            batchjobs.submit(job)
            jobs.append(job)

        batchjobs.wait_all(jobs)
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            try:
                results = pickle.load(open(os.path.join(workdir,
                                                        "result.pkl")))
            except:
                print >> out, "Error in unpickling result in %s" % workdir
                print >> out, traceback.format_exc()
                results = []

            if len(results) == 0:
                ofs_summary.write("#%s failed\n" %
                                  os.path.relpath(workdir, params.workdir))

            lcv, alcv = float("nan"), float("nan")
            for cycle, wd, num_files, stats in results:
                lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV)
                write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files,
                                  num_files, stats)

            # Last lcv & alcv
            try:
                html_report.add_merge_result(workdir, clh, lcv, alcv,
                                             xds_files, results[-1][2],
                                             results[-1][3])
            except:
                print >> out, traceback.format_exc()
    else:
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            print >> out, "Merging %s..." % os.path.relpath(
                workdir, params.workdir)
            out.flush()
            results = merge_datasets(params, workdir, xds_files, cells,
                                     space_group)

            if len(results) == 0:
                ofs_summary.write("#%s failed\n" %
                                  os.path.relpath(workdir, params.workdir))

            for cycle, wd, num_files, stats in results:
                lcv, alcv = stats.get("lcv", LCV), stats.get("alcv", aLCV)
                write_ofs_summary(workdir, cycle, clh, lcv, alcv, xds_files,
                                  num_files, stats)

            try:
                html_report.add_merge_result(workdir, clh, lcv, alcv,
                                             xds_files, results[-1][2],
                                             results[-1][3])
            except:
                print >> out, traceback.format_exc()

    try:
        html_report.write_html()
    except:
        print >> out, traceback.format_exc()

    print "firefox %s" % os.path.join(html_report.root, "report.html")

    out.write("\nNormal exit at %s\n" % time.strftime("%Y-%m-%d %H:%M:%S"))
    out.write("Total wall-clock time: %.2f sec.\n" %
              (time.time() - time_started))

    return
Example #33
0
        for i in idxes[:len(ksb.arrays)//2]:
            ksb.arrays[i] = ksb.arrays[i].customized_copy(indices=debug_op.apply(ksb.arrays[i].indices()))

        print "altered:", idxes

    ksb.assign_operators([debug_op, sgtbx.change_of_basis_op("h,k,l")])
    print "right?:", [i for i, x in enumerate(ksb.best_operators) if not x.is_identity_op()]
    #ksb.debug_write_mtz()
    #ksb.modify_xds_ascii_files()

    quit()

    arrays = []
    for f in xac_files:
        print "Reading", f
        xac = XDS_ASCII(f, i_only=True)
        xac.remove_rejected()
        a = xac.i_obs().resolution_filter(d_min=3)
        a = a.merge_equivalents(use_internal_variance=False).array()
        arrays.append(a)

    symm = arrays[0].crystal_symmetry()
    cosets = reindex.reindexing_operators(symm, symm)
    reidx_ops = cosets.combined_cb_ops()
    reidx_ops.sort(key=lambda x: not x.is_identity_op())
    print " Possible reindex operators:", map(lambda x: str(x.as_hkl()), reidx_ops)

    determined = set([0,])
    old_ops = map(lambda x:0, xrange(len(arrays)))

    for ncycle in xrange(100):  # max cycle
Example #34
0
def rescale_with_specified_symm(topdir,
                                dirs,
                                symms,
                                out,
                                sgnum=None,
                                reference_symm=None):
    assert (sgnum, reference_symm).count(None) == 1

    if sgnum is not None:
        sgnum_laue = sgtbx.space_group_info(sgnum).group(
        ).build_derived_reflection_intensity_group(False).type().number()

        matches = filter(
            lambda x: x.reflection_intensity_symmetry(False).space_group_info(
            ).type().number() == sgnum_laue, symms)
        matched_cells = numpy.array(
            map(lambda x: x.unit_cell().parameters(), matches))
        median_cell = map(lambda x: numpy.median(matched_cells[:, x]),
                          xrange(6))

        reference_symm = crystal.symmetry(median_cell, sgnum)
    else:
        sgnum = reference_symm.space_group_info().type().number()
        sgnum_laue = reference_symm.space_group(
        ).build_derived_reflection_intensity_group(False).type().number()

    print >> out
    print >> out, "Re-scaling with specified symmetry:", reference_symm.space_group_info(
    ).symbol_and_number()
    print >> out, " reference cell:", reference_symm.unit_cell()
    print >> out
    print >> out

    cells = {}  # cell and file
    for sym, wd in zip(symms, dirs):
        print >> out, os.path.relpath(wd, topdir),

        # Find appropriate data
        xac_file = util.return_first_found_file(
            ("XDS_ASCII.HKL_noscale.org", "XDS_ASCII.HKL_noscale",
             "XDS_ASCII_fullres.HKL.org", "XDS_ASCII_fullres.HKL",
             "XDS_ASCII.HKL.org", "XDS_ASCII.HKL"),
            wd=wd)
        if xac_file is None:
            print >> out, "Can't find XDS_ASCII file in %s" % wd
            continue

        xac = XDS_ASCII(xac_file, read_data=False)
        print >> out, "%s %s (%s)" % (
            os.path.basename(xac_file), xac.symm.space_group_info(), ",".join(
                map(lambda x: "%.2f" % x,
                    xac.symm.unit_cell().parameters())))

        if xac.symm.reflection_intensity_symmetry(
                False).space_group_info().type().number() == sgnum_laue:
            if xac.symm.unit_cell().is_similar_to(reference_symm.unit_cell(),
                                                  0.1, 10):
                print >> out, "  Already scaled with specified symmetry"
                cells[wd] = (numpy.array(xac.symm.unit_cell().parameters()),
                             xac_file)
                continue

        xdsinp = os.path.join(wd, "XDS.INP")
        cosets = reindex.reindexing_operators(reference_symm, xac.symm, 0.2,
                                              20)

        if len(cosets.combined_cb_ops()) == 0:
            print >> out, "Can't find operator:"
            sym.show_summary(out, " ")
            reference_symm.show_summary(out, " ")
            continue

        newcell = reference_symm.space_group().average_unit_cell(
            xac.symm.change_basis(cosets.combined_cb_ops()[0]).unit_cell())
        newcell = " ".join(map(lambda x: "%.3f" % x, newcell.parameters()))
        print >> out, "Scaling with transformed cell:", newcell

        #for f in xds_files.generated_by_CORRECT:
        #    util.rotate_file(os.path.join(wd, f))
        bk_prefix = make_backup(xds_files.generated_by_CORRECT,
                                wdir=wd,
                                quiet=True)

        modify_xdsinp(
            xdsinp,
            inp_params=[
                ("JOB", "CORRECT"),
                ("SPACE_GROUP_NUMBER", "%d" % sgnum),
                ("UNIT_CELL_CONSTANTS", newcell),
                ("INCLUDE_RESOLUTION_RANGE", "50 0"),
                ("CORRECTIONS", ""),
                ("NBATCH", "1"),
                ("MINIMUM_I/SIGMA", None),  # use default
                ("REFINE(CORRECT)", None),  # use default
            ])
        run_xds(wd)
        for f in ("XDS.INP", "CORRECT.LP", "XDS_ASCII.HKL", "GXPARM.XDS"):
            if os.path.exists(os.path.join(wd, f)):
                shutil.copyfile(os.path.join(wd, f),
                                os.path.join(wd, f + "_rescale"))

        revert_files(xds_files.generated_by_CORRECT,
                     bk_prefix,
                     wdir=wd,
                     quiet=True)

        new_xac = os.path.join(wd, "XDS_ASCII.HKL_rescale")
        new_gxparm = os.path.join(wd, "GXPARM.XDS_rescale")
        if os.path.isfile(new_xac) and os.path.isfile(new_gxparm):
            cells[wd] = (XPARM(new_gxparm).unit_cell, new_xac)
            print "OK:", cells[wd][0]
        else:
            print >> out, "Error: rescaling failed (Can't find XDS_ASCII.HKL)"
            continue

    return cells, reference_symm
Example #35
0
    def run_cycle(self, xds_ascii_files, reference_idx=None):
        if len(xds_ascii_files) == 0:
            print >>self.out, "Error: no files given."
            return

        xscale_inp = os.path.join(self.workdir, "XSCALE.INP")
        xscale_lp = os.path.join(self.workdir, "XSCALE.LP")

        # Get averaged cell for scaling
        sg, cell = self.average_cells(xds_ascii_files)
        
        # Choose directory containing XDS_ASCII.HKL and set space group (but how??)
        inp_out = open(xscale_inp, "w")
        inp_out.write("MAXIMUM_NUMBER_OF_PROCESSORS= %d\n" % self.nproc)
        inp_out.write("SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell))
        inp_out.write(self.xscale_inp_head)

        for i, xds_ascii in enumerate(xds_ascii_files):
            f = self.altfile.get(xds_ascii, xds_ascii)
            tmp = min(os.path.relpath(f, self.workdir), f, key=lambda x:len(x))
            refstr = "*" if i==reference_idx else " "
            inp_out.write(" INPUT_FILE=%s%s\n" % (refstr,tmp))
            if len(self.xscale_params.corrections) != 3:
                inp_out.write("  CORRECTIONS= %s\n" % " ".join(self.xscale_params.corrections))
            if self.xscale_params.frames_per_batch is not None:
                frame_range = XDS_ASCII(f, read_data=False).get_frame_range()
                nframes = frame_range[1] - frame_range[0]
                nbatch = int(numpy.ceil(nframes / self.xscale_params.frames_per_batch))
                print >>self.out, "frame range of %s is %d,%d setting NBATCH= %d" % (f, frame_range[0], frame_range[1], nbatch)
                inp_out.write("  NBATCH= %d\n" % nbatch)

        inp_out.close()

        print >>self.out, "DEBUG:: running xscale with %3d files.." % len(xds_ascii_files)
        xscale.run_xscale(xscale_inp)
        #util.call(xscale_comm, wdir=self.workdir)

        cbfouts = glob.glob(os.path.join(self.workdir, "*.cbf"))
        if len(cbfouts) > 0:
            # This doesn't affect anything, so I don't want program to stop if this failed
            try:
                xscalelp.cbf_to_dat(xscale_lp)
                for f in cbfouts: os.remove(f)
            except:
                print >>self.out, traceback.format_exc()

        xscale_log = open(xscale_lp).read()
        if "!!! ERROR !!! INSUFFICIENT NUMBER OF COMMON STRONG REFLECTIONS." in xscale_log:
            print >>self.out, "DEBUG:: Need to choose files."

            # From XDS ver. March 1, 2015, it kindly informs which dataset has no common reflections.
            # ..but does not print the table. Sometimes only one dataset is left. Should we make table by ourselves?
            # Older versions just print correlation table and stop.
            if "CORRELATIONS BETWEEN INPUT DATA SETS AFTER CORRECTIONS" in xscale_log:
                G = xscalelp.construct_data_graph(xscale_lp, min_common_refs=10)
                #nx.write_dot(G, os.path.join(self.workdir, "common_set_graph.dot"))
                cliques = [c for c in nx.find_cliques(G)]
                cliques.sort(key=lambda x:len(x))
                if self._counter == 1:
                    max_clique = cliques[-1]
                else:
                    idx_prevfile = 1 if self.reference_file else 0
                    max_clique = filter(lambda x: idx_prevfile in x, cliques)[-1] # xscale.hkl must be included!

                if self.reference_file:
                    max_clique = [0,] + filter(lambda x: x!=0, max_clique)

                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))

                try_later = map(lambda i: xds_ascii_files[i], filter(lambda x: x not in max_clique, G.nodes()))

                print >>self.out, "DEBUG:: %d files can be merged. %d files will be merged later." % (len(max_clique),
                                                                                                      len(try_later))
                print >>self.out, "DEBUG:: %d files are of no use." % (len(xds_ascii_files)-len(G.nodes()))
                for i in filter(lambda j: j not in G.nodes(), xrange(len(xds_ascii_files))):
                    self.removed_files.append(xds_ascii_files[i])
                    self.removed_reason[xds_ascii_files[i]] = "no_common_refls"

                self.run_cycle(map(lambda i: xds_ascii_files[i], max_clique))

                assert len(try_later) <= 0 # Never be the case with newer xscale!! (if the case, check_remove_list() should be modified to skip_num+=1
                if len(try_later) > 0:
                    print >>self.out, "Trying to merge %d remaining files.." % len(try_later)
                    next_files = [os.path.join(self.workdir, "xscale.hkl")] + try_later
                    if self.reference_file: next_files = [self.reference_file,] + next_files
                    self.workdir = self.request_next_workdir()
                    self.run_cycle(next_files)
                    return
            else:
                bad_idxes = xscalelp.read_no_common_ref_datasets(xscale_lp)
                print >>self.out, "DEBUG:: %d files are of no use." % (len(bad_idxes))

                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))

                # XXX Actually, not all datasets need to be thrown.. some of them are useful..
                for i in bad_idxes:
                    self.removed_files.append(xds_ascii_files[i])
                    self.removed_reason[xds_ascii_files[i]] = "no_common_refls"

                self.run_cycle(map(lambda i: xds_ascii_files[i], 
                                   filter(lambda j: j not in bad_idxes, xrange(len(xds_ascii_files)))))

            return
        elif "!!! ERROR !!! USELESS DATA ON INPUT REFLECTION FILE" in xscale_log:
            print >>self.out, "DEBUG:: Need to discard useless data."
            unuseful_data = [xscalelp.get_read_data(xscale_lp)[-1]] #filter(lambda x: x[2]==0, xscalelp.get_read_data(xscale_lp))
            if len(unuseful_data) == 0:
                print >>self.out, "I don't know how to fix it.."
                return
            remove_idxes = map(lambda x: x[0]-1, unuseful_data)
            remove_idxes = self.check_remove_list(remove_idxes)
            keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files)))
            for i in remove_idxes:
                self.removed_files.append(xds_ascii_files[i])
                self.removed_reason[xds_ascii_files[i]] = "useless"

            for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))
            self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes))
            return
        elif "INACCURATE SCALING FACTORS." in xscale_log:
            # Actually I don't know how to fix this.. (bug?) but worth proceeding (discarding bad data may solve problem).
            print >>self.out, "'INACCURATE SCALING FACTORS' happened.. but ignored."
        elif "!!! ERROR !!!" in xscale_log:
            print >>self.out, "Unknown error! please check the XSCALE.LP and fix the program."
            return

        # Re-scale by changing reference
        rescale_for = None
        if len(self.reject_method) == 0:
            rescale_for = self.reference_choice # may be None
        elif reference_idx is None:
            rescale_for = "bmed"
        
        if rescale_for is not None and len(xds_ascii_files) > 1:
            ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, rescale_for, return_as="index")
            if reference_idx != ref_num:
                print >>self.out, "Rescaling with %s" % rescale_for
                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))
                self.run_cycle(xds_ascii_files, reference_idx=ref_num)

        if len(self.reject_method) == 0:
            return

        # Remove bad data
        remove_idxes = []
        remove_reasons = {}

        if self.reject_method[0] == "framecc":
            print >>self.out, "Rejections based on frame CC"
            from yamtbx.dataproc.xds.command_line import xscale_cc_against_merged

            # list of [frame, n_all, n_common, cc] in the same order
            framecc = xscale_cc_against_merged.run(hklin=os.path.join(self.workdir, "xscale.hkl"),
                                                   output_dir=self.workdir,
                                                   nproc=self.nproc).values()
            if self.reject_params.framecc.method == "tukey":
                ccs = numpy.array(map(lambda x: x[3], reduce(lambda x,y:x+y,framecc)))
                q25, q75 = numpy.percentile(ccs, [25, 75])
                cc_cutoff  = q25 - self.reject_params.framecc.iqr_coeff * (q75 - q25)
                print >>self.out, " frameCC cutoff = %.4f (%.2f*IQR)" % (cc_cutoff, self.reject_params.framecc.iqr_coeff)
            else:
                cc_cutoff = self.reject_params.framecc.abs_cutoff
                print >>self.out, " frameCC cutoff = %.4f (value specified)" % cc_cutoff

            for i, cclist in enumerate(framecc):
                useframes = map(lambda x: x[0], filter(lambda x: x[3] > cc_cutoff, cclist))
                if len(useframes) == 0:
                    remove_idxes.append(i)
                    remove_reasons.setdefault(i, []).append("allbadframe")
                    continue

                f = xds_ascii_files[i]
                xac = XDS_ASCII(f)
                if set(useframes).issuperset(set(range(min(xac.iframe), max(xac.iframe)))):
                    continue # All useful frames.

                sel = xac.iframe == useframes[0]
                for x in useframes[1:]: sel |= xac.iframe == x
                if sum(sel) < 10: # XXX care I/sigma
                    remove_idxes.append(i)
                    remove_reasons.setdefault(i, []).append("allbadframe")
                    continue

                print >>self.out, "Extracting frames %s out of %d-%d in %s" % (",".join(map(str,useframes)),
                                                                               min(xac.iframe), max(xac.iframe),
                                                                               f)

                newf = self.request_file_modify(f)
                xac.write_selected(sel, newf)

            self.reject_method.pop(0) # Perform only once

        elif self.reject_method[0] == "lpstats":
            if "bfactor" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on B-factor outliers (%.2f*IQR)" % iqrc
                Bs = numpy.array(map(lambda x:x[1], xscalelp.get_k_b(xscale_lp)))
                q25, q75 = numpy.percentile(Bs, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, b in enumerate(Bs):
                    if b < lowlim or b > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_B")
                        count += 1

                print >>self.out, " %4d B-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "em.b" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on error model b outliers (%.2f*IQR)" % iqrc
                bs = numpy.array(map(lambda x:x[1], xscalelp.get_ISa(xscale_lp)))
                q25, q75 = numpy.percentile(bs, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, b in enumerate(bs):
                    if b < lowlim or b > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_em.b")
                        count += 1

                print >>self.out, " %4d error model b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "em.ab" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on error model a*b outliers (%.2f*IQR)" % iqrc
                vals = numpy.array(map(lambda x:x[0]*x[1], xscalelp.get_ISa(xscale_lp)))
                q25, q75 = numpy.percentile(vals, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, ab in enumerate(vals):
                    if ab < lowlim or ab > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_em.ab")
                        count += 1

                print >>self.out, " %4d error model a*b outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "rfactor" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >>self.out, "Rejections based on R-factor outliers (%.2f*IQR)" % iqrc
                rstats = xscalelp.get_rfactors_for_each(xscale_lp)
                vals = numpy.array(map(lambda x:rstats[x][-1][1], rstats)) # Read total R-factor
                q25, q75 = numpy.percentile(vals, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc*iqr, q75 + iqrc*iqr
                count = 0
                for i, v in enumerate(vals):
                    if v < lowlim or v > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_R")
                        count += 1

                print >>self.out, " %4d R-factor outliers (<%.2f, >%.2f) removed"% (count, lowlim, highlim)

            if "pairwise_cc" in self.reject_params.lpstats.stats:
                corrs = xscalelp.get_pairwise_correlations(xscale_lp)
                if self.reject_params.lpstats.pwcc.method == "tukey":
                    q25, q75 = numpy.percentile(map(lambda x: x[3], corrs), [25, 75])
                    iqr = q75 - q25
                    lowlim = q25 - self.reject_params.lpstats.pwcc.iqr_coeff * iqr
                    print >>self.out, "Rejections based on pairwise_cc < %.4f (IQR=%.2f)" % (lowlim, iqr)
                else:
                    lowlim = self.reject_params.lpstats.pwcc.abs_cutoff
                    print >>self.out, "Rejections based on pairwise_cc < %.4f" % lowlim

                bad_corrs = filter(lambda x: x[3] < lowlim, corrs)
                idx_bad = {}
                for i, j, common_refs, corr, ratio, bfac in bad_corrs:
                    idx_bad[i] = idx_bad.get(i, 0) + 1
                    idx_bad[j] = idx_bad.get(j, 0) + 1

                idx_bad = idx_bad.items()
                idx_bad.sort(key=lambda x:x[1])
                count = 0
                for idx, badcount in reversed(idx_bad):
                    remove_idxes.append(idx-1)
                    remove_reasons.setdefault(idx-1, []).append("bad_pwcc")
                    bad_corrs = filter(lambda x: idx not in x[:2], bad_corrs)
                    if len(bad_corrs) == 0: break
                    fun_key = lambda x: x[3]
                    print >>self.out, " Removing idx=%d (CC %.3f..%.3f) remaining %d bad pairs" % (idx, 
                                                                                                   min(bad_corrs,key=fun_key)[3],
                                                                                                   max(bad_corrs,key=fun_key)[3],
                                                                                                   len(bad_corrs))
                    count += 1
                print >>self.out, " %4d pairwise CC outliers removed" % count

            self.reject_method.pop(0) # Perform only once
        elif self.reject_method[0] == "delta_cc1/2":
            print >>self.out, "Rejection based on delta_CC1/2 in %s shell" % self.delta_cchalf_bin
            table = xscalelp.read_stats_table(xscale_lp)
            i_stat = -1 if self.delta_cchalf_bin == "total" else -2
            prev_cchalf = table["cc_half"][i_stat]
            prev_nuniq = table["nuniq"][i_stat]
            # file_name->idx table
            remaining_files = collections.OrderedDict(map(lambda x: x[::-1], enumerate(xds_ascii_files)))

            # For consistent resolution limit
            inp_head = self.xscale_inp_head + "SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell)
            count = 0
            for i in xrange(len(xds_ascii_files)-1): # if only one file, cannot proceed.
                tmpdir = os.path.join(self.workdir, "reject_test_%.3d" % i)

                cchalf_list = xscale.calc_cchalf_by_removing(wdir=tmpdir, inp_head=inp_head,
                                                             inpfiles=remaining_files.keys(),
                                                             stat_bin=self.delta_cchalf_bin,
                                                             nproc=self.nproc,
                                                             nproc_each=self.nproc_each,
                                                             batchjobs=self.batchjobs)

                rem_idx, cc_i, nuniq_i = cchalf_list[0] # First (largest) is worst one to remove.
                rem_idx_in_org = remaining_files[remaining_files.keys()[rem_idx]]
                
                # Decision making by CC1/2
                print >>self.out, "DEBUG:: cycle %.3d remove %3d if %.2f*%d > %.2f*%d" % (i, rem_idx_in_org, 
                                                                                          cc_i, nuniq_i,
                                                                                          prev_cchalf, prev_nuniq)
                if cc_i*nuniq_i <= prev_cchalf*prev_nuniq: break
                print >>self.out, "Removing idx= %3d gained CC1/2 by %.2f" % (rem_idx_in_org, cc_i-prev_cchalf)

                prev_cchalf, prev_nuniq = cc_i, nuniq_i
                remove_idxes.append(rem_idx_in_org)
                remove_reasons.setdefault(rem_idx_in_org, []).append("bad_cchalf")
                del remaining_files[remaining_files.keys()[rem_idx]] # remove file from table
                count += 1

            print >>self.out, " %4d removed by DeltaCC1/2 method" % count

            if self.next_delta_cchalf_bin != []:
                self.delta_cchalf_bin = self.next_delta_cchalf_bin.pop(0)
            else:
                self.reject_method.pop(0)
        else:
            print >>self.out, "ERROR:: Unsupported reject_method (%s)" % reject_method

        # Remove duplicates
        remove_idxes = list(set(remove_idxes))
        remove_idxes = self.check_remove_list(remove_idxes)
        if len(remove_idxes) > 0:
            print >>self.out, "DEBUG:: Need to remove %d files" % len(remove_idxes)
            for i in sorted(remove_idxes): 
                print >>self.out, " %.3d %s" % (i, xds_ascii_files[i])
                self.removed_files.append(xds_ascii_files[i])
                self.removed_reason[xds_ascii_files[i]] = ",".join(remove_reasons[i])

        # Next run
        keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files)))
        if len(self.reject_method) > 0 or len(remove_idxes) > 0:
            self.workdir = self.request_next_workdir()
            self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes))
        elif self.reference_choice is not None and len(keep_idxes) > 1:
            # Just re-scale with B reference
            ref_num = xscale.decide_scaling_reference_based_on_bfactor(xscale_lp, self.reference_choice, return_as="index")
            if reference_idx != ref_num:
                print >>self.out, "Rescaling2 with %s" % self.reference_choice
                for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f))
                self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes), reference_idx=ref_num)
Example #36
0
def run(params, out=sys.stdout):
    cm = CellGraph(tol_length=params.tol_length, tol_angle=params.tol_angle)

    if not params.xdsdir and params.topdir:
        params.xdsdir = map(
            lambda x: x[0],
            filter(
                lambda x: any(
                    map(lambda y: y.startswith("XDS_ASCII.HKL"), x[2])) or
                "DIALS.HKL" in x[2], os.walk(params.topdir)))

    for i, xdsdir in enumerate(params.xdsdir):
        cm.add_proc_result(i, xdsdir)

    cm.group_xds_results(out)
    ret = cm.grouped_dirs

    if len(ret) == 0:
        return cm

    print >> out
    print >> out, "About the largest group:"
    for idx, wd in enumerate(ret[0]):
        xac_hkl = os.path.join(wd, "XDS_ASCII.HKL")
        correct_lp = os.path.join(wd, "CORRECT.LP")
        print >> out, "%.3d %s" % (idx, os.path.relpath(wd, params.topdir)
                                   if params.topdir is not None else wd),
        if not os.path.isfile(xac_hkl):
            print >> out, "Unsuccessful"
            continue

        sg = XDS_ASCII(xac_hkl, read_data=False).symm.space_group_info()
        clp = correctlp.CorrectLp(correct_lp)
        if "all" in clp.table:
            cmpl = clp.table["all"]["cmpl"][-1]
        else:
            cmpl = float("nan")
        ISa = clp.a_b_ISa[-1]
        print >> out, "%10s ISa=%5.2f Cmpl=%5.1f " % (sg, ISa, cmpl)

    if params.do_pointless:
        worker = pointless.Pointless()
        files = map(lambda x: os.path.join(x, "INTEGRATE.HKL"), ret[0])
        #print files
        files = filter(lambda x: os.path.isfile(x), files)

        print >> out, "\nRunning pointless for the largest member."
        result = worker.run_for_symm(xdsin=files,
                                     logout="pointless.log",
                                     tolerance=10,
                                     d_min=5)
        if "symm" in result:
            print >> out, " pointless suggested", result[
                "symm"].space_group_info()

    if 0:
        import pylab
        pos = nx.spring_layout(G)
        #pos = nx.spectral_layout(G)
        #pos = nx.circular_layout(G)

        #nx.draw_networkx_nodes(G, pos, node_size = 100, nodelist=others, node_color = 'w')
        nx.draw_networkx_nodes(G, pos, node_size=100, node_color='w')
        nx.draw_networkx_edges(G, pos, width=1)
        nx.draw_networkx_labels(G,
                                pos,
                                font_size=12,
                                font_family='sans-serif',
                                font_color='r')

        pylab.xticks([])
        pylab.yticks([])
        pylab.savefig("network.png")
        pylab.show()

    return cm
Example #37
0
def run(params):
    if os.path.isdir(params.workdir) and os.listdir(params.workdir):
        print "Directory already exists and not empty:", params.workdir
        return

    if params.reference_file is not None and params.program != "xscale":
        print "WARNING - reference file is not used unless program=xscale."

    if not os.path.isdir(params.workdir):
        os.makedirs(params.workdir)

    if params.batch.engine == "sge":
        batchjobs = batchjob.SGE(pe_name=params.batch.sge_pe_name)
    elif params.batch.engine == "sh":
        batchjobs = batchjob.ExecLocal()
    else:
        raise "Unknown batch engine: %s" % params.batch.engine

    out = multi_out()
    out.register("log", open(os.path.join(params.workdir, "multi_merge.log"), "w"), atexit_send_to=None)
    out.register("stdout", sys.stdout)

    print >>out, "Paramters:"
    libtbx.phil.parse(master_params_str).format(params).show(out=out, prefix=" ")
    print >>out, ""

    # XXX Not works when clustering is used..
    html_report = multi_merging.html_report.HtmlReportMulti(os.path.abspath(params.workdir))
    try: html_report.add_params(params, master_params_str)
    except: print >>out, traceback.format_exc()

    xds_ascii_files = map(lambda x: x[:(x.index("#") if "#" in x else None)].strip(), open(params.lstin))
    xds_ascii_files = filter(lambda x: x!="" and os.path.isfile(x), xds_ascii_files)
    xds_ascii_files = map(lambda x: os.path.abspath(x), xds_ascii_files)

    cells = collections.OrderedDict()
    laues = {} # for check
    for xac in xds_ascii_files:
        try:
            symm = XDS_ASCII(xac, read_data=False).symm
        except:
            print >>out, "Error in reading %s" % xac
            print >>out, traceback.format_exc()
            return
        cells[xac] = symm.unit_cell().parameters()
        laue = symm.space_group().build_derived_reflection_intensity_group(False).info()
        laues.setdefault(str(laue),{}).setdefault(symm.space_group_info().type().number(), []).append(xac)

    if len(laues) > 1:
        print >>out, "ERROR! more than one space group included."
        for laue in laues:
            print "Laue symmetry", laue
            for sg in laues[laue]:
                print >>out, " SPACE_GROUP_NUMBER= %d (%d data)" % (sg, len(laues[laue][sg]))
                for f in laues[laue][sg]: print >>out, "  %s" % f
                print >>out, ""
        return
            
    try: html_report.add_cells_and_files(cells, laues.keys()[0])
    except: print >>out, traceback.format_exc()

    data_for_merge = []
    if params.clustering == "blend":
        if params.blend.use_old_result is None:
            blend_wdir = os.path.join(params.workdir, "blend")
            os.mkdir(blend_wdir)
            blend.run_blend0R(blend_wdir, xds_ascii_files)
            print >>out, "\nRunning BLEND with analysis mode"
        else:
            blend_wdir = params.blend.use_old_result
            print >>out, "\nUsing precalculated BLEND result in %s" % params.blend.use_old_result

        blend_clusters = blend.BlendClusters(workdir=blend_wdir, d_min=params.d_min)
        summary_out = os.path.join(blend_wdir, "blend_cluster_summary.dat")
        clusters = blend_clusters.show_cluster_summary(out=open(summary_out, "w"))
        print >>out, "Clusters found by BLEND were summarized in %s" % summary_out

        if params.blend.min_cmpl is not None:
            clusters = filter(lambda x: x[3] >= params.blend.min_cmpl, clusters)
        if params.blend.min_acmpl is not None:
            clusters = filter(lambda x: x[5] >= params.blend.min_acmpl, clusters)            
        if params.blend.min_redun is not None:
            clusters = filter(lambda x: x[4] >= params.blend.min_redun, clusters)
        if params.blend.min_aredun is not None:
            clusters = filter(lambda x: x[6] >= params.blend.min_aredun, clusters)            
        if params.blend.max_LCV is not None:
            clusters = filter(lambda x: x[7] <= params.blend.max_LCV, clusters)
        if params.blend.max_aLCV is not None:
            clusters = filter(lambda x: x[8] <= params.blend.max_aLCV, clusters)

        print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters)
        for clno, IDs, clh, cmpl, redun, acmpl, aredun, LCV, aLCV in clusters: # process largest first
            print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f LCV= %5.1f aLCV=%5.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun, LCV, aLCV)
            data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno),
                                   map(lambda x: blend_clusters.files[x-1], IDs),
                                   LCV, aLCV,clh))
        print >>out
        try: html_report.add_clutering_result(clusters, "blend")
        except: print >>out, traceback.format_exc()

    elif params.clustering == "cc":
        ccc_wdir = os.path.join(params.workdir, "cc_clustering")
        os.mkdir(ccc_wdir)
        cc_clusters = cc_clustering.CCClustering(ccc_wdir, xds_ascii_files,
                                                 d_min=params.cc_clustering.d_min,
                                                 min_ios=params.cc_clustering.min_ios)
        print >>out, "\nRunning CC-based clustering"

        cc_clusters.do_clustering(nproc=params.cc_clustering.nproc,
                                  b_scale=params.cc_clustering.b_scale,
                                  use_normalized=params.cc_clustering.use_normalized,
                                  html_maker=html_report)
        summary_out = os.path.join(ccc_wdir, "cc_cluster_summary.dat")
        clusters = cc_clusters.show_cluster_summary(d_min=params.d_min, out=open(summary_out, "w"))
        print >>out, "Clusters were summarized in %s" % summary_out

        if params.cc_clustering.min_cmpl is not None:
            clusters = filter(lambda x: x[3] >= params.cc_clustering.min_cmpl, clusters)
        if params.cc_clustering.min_acmpl is not None:
            clusters = filter(lambda x: x[5] >= params.cc_clustering.min_acmpl, clusters)            
        if params.cc_clustering.min_redun is not None:
            clusters = filter(lambda x: x[4] >= params.cc_clustering.min_redun, clusters)
        if params.cc_clustering.min_aredun is not None:
            clusters = filter(lambda x: x[6] >= params.cc_clustering.min_aredun, clusters)            
        if params.cc_clustering.max_clheight is not None:
            clusters = filter(lambda x: x[2] <= params.cc_clustering.max_clheight, clusters)

        print >>out, "With specified conditions, following %d clusters will be merged:" % len(clusters)
        for clno, IDs, clh, cmpl, redun, acmpl, aredun in clusters: # process largest first
            print >>out, " Cluster_%.4d NumDS= %4d CLh= %5.1f Cmpl= %6.2f Redun= %4.1f ACmpl=%6.2f ARedun=%4.1f" % (clno, len(IDs), clh, cmpl, redun, acmpl, aredun)
            data_for_merge.append((os.path.join(params.workdir, "cluster_%.4d"%clno),
                                   map(lambda x: xds_ascii_files[x-1], IDs),
                                   float("nan"),float("nan"),clh))
        print >>out

        try: html_report.add_clutering_result(clusters, "cc_clustering")
        except: print >>out, traceback.format_exc()
        
    else:
        data_for_merge.append((os.path.join(params.workdir, "all_data"),
                               xds_ascii_files, float("nan"), float("nan"), 0))

    ofs_summary = open(os.path.join(params.workdir, "cluster_summary.dat"), "w")
    ofs_summary.write("# d_min= %.3f A\n" % (params.d_min if params.d_min is not None else float("nan")))
    ofs_summary.write("# LCV and aLCV are values of all data\n")
    ofs_summary.write("     cluster  ClH   LCV aLCV run ds.all ds.used  Cmpl Redun I/sigI Rmeas CC1/2 Cmpl.ou Red.ou I/sig.ou Rmeas.ou CC1/2.ou Cmpl.in Red.in I/sig.in Rmeas.in CC1/2.in SigAno.in CCano.in WilsonB Aniso  \n")

    out.flush()

    def write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats):
        tmps = "%12s %5.2f %4.1f %4.1f %3d %6d %7d %5.1f %5.1f %6.2f %5.1f %5.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %7.1f %6.1f % 8.2f % 8.1f %8.1f %9.1f %8.1f %7.2f %7.1e\n"
        ofs_summary.write(tmps % (os.path.relpath(workdir, params.workdir), clh, LCV, aLCV, cycle,
                                  len(xds_files), num_files,
                                  stats["cmpl"][0],
                                  stats["redundancy"][0],
                                  stats["i_over_sigma"][0],
                                  stats["r_meas"][0],
                                  stats["cc_half"][0],
                                  stats["cmpl"][2],
                                  stats["redundancy"][2],
                                  stats["i_over_sigma"][2],
                                  stats["r_meas"][2],
                                  stats["cc_half"][2],
                                  stats["cmpl"][1],
                                  stats["redundancy"][1],
                                  stats["i_over_sigma"][1],
                                  stats["r_meas"][1],
                                  stats["cc_half"][1],
                                  stats["sig_ano"][1],
                                  stats["cc_ano"][1],
                                  stats["xtriage_log"].wilson_b,
                                  stats["xtriage_log"].anisotropy,
                                  ))
        ofs_summary.flush()
    # write_ofs_summary()

    if "merging" in params.batch.par_run:
        params.nproc = params.batch.nproc_each
        jobs = []
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            if not os.path.exists(workdir): os.makedirs(workdir)
            shname = "merge_%s.sh" % os.path.relpath(workdir, params.workdir)
            pickle.dump((params, os.path.abspath(workdir), xds_files, cells, batchjobs), open(os.path.join(workdir, "args.pkl"), "w"), -1)
            job = batchjob.Job(workdir, shname, nproc=params.batch.nproc_each)
            job.write_script("""\
"%s" -c '\
import pickle; \
from yamtbx.dataproc.auto.command_line.multi_merge import merge_datasets; \
args = pickle.load(open("args.pkl")); \
ret = merge_datasets(*args); \
pickle.dump(ret, open("result.pkl","w")); \
'
""" % sys.executable)
            batchjobs.submit(job)
            jobs.append(job)

        batchjobs.wait_all(jobs)
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            try:
                results = pickle.load(open(os.path.join(workdir, "result.pkl")))
            except:
                print >>out, "Error in unpickling result in %s" % workdir
                print >>out, traceback.format_exc()
                results = []

            if len(results) == 0:
                ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir))
            for cycle, wd, num_files, stats in results:
                write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats)

            try: html_report.add_merge_result(workdir, clh, LCV, aLCV, xds_files, results[-1][2], results[-1][3])
            except: print >>out, traceback.format_exc()
    else:
        for workdir, xds_files, LCV, aLCV, clh in data_for_merge:
            print >>out, "Merging %s..." % os.path.relpath(workdir, params.workdir)
            out.flush()
            results = merge_datasets(params, workdir, xds_files, cells, batchjobs)
            
            if len(results) == 0:
                ofs_summary.write("#%s failed\n" % os.path.relpath(workdir, params.workdir))

            for cycle, wd, num_files, stats in results:
                write_ofs_summary(workdir, cycle, clh, LCV, aLCV, xds_files, num_files, stats)

            try: html_report.add_merge_result(workdir, clh, LCV, aLCV, xds_files, results[-1][2], results[-1][3])
            except: print >>out, traceback.format_exc()

    try: html_report.write_html()
    except: print >>out, traceback.format_exc()

    print "firefox %s" % os.path.join(html_report.root, "report.html")
    return
Example #38
0
def rescale_with_specified_symm_worker(sym_wd_wdr,
                                       topdir,
                                       log_out,
                                       reference_symm,
                                       sgnum,
                                       sgnum_laue,
                                       prep_dials_files=False):
    # XXX Unsafe if multiple processes run this function for the same target directory at the same time

    sym, wd, wdr = sym_wd_wdr
    out = StringIO()
    print >> out, os.path.relpath(wd, topdir),

    # Find appropriate data # XXX not works for DIALS data!!
    xac_file = util.return_first_found_file(
        ("XDS_ASCII.HKL_noscale.org", "XDS_ASCII.HKL_noscale",
         "XDS_ASCII_fullres.HKL.org", "XDS_ASCII_fullres.HKL",
         "XDS_ASCII.HKL.org", "XDS_ASCII.HKL"),
        wd=wd)
    if xac_file is None:
        print >> out, "Can't find XDS_ASCII file in %s" % wd
        log_out.write(out.getvalue())
        log_out.flush()
        return (wd, None)

    xac = XDS_ASCII(xac_file, read_data=False)
    print >> out, "%s %s (%s)" % (os.path.basename(xac_file),
                                  xac.symm.space_group_info(), ",".join(
                                      map(lambda x: "%.2f" % x,
                                          xac.symm.unit_cell().parameters())))

    if xac.symm.reflection_intensity_symmetry(
            False).space_group_info().type().number() == sgnum_laue:
        if xac.symm.unit_cell().is_similar_to(reference_symm.unit_cell(), 0.1,
                                              10):
            print >> out, "  Already scaled with specified symmetry"
            log_out.write(out.getvalue())
            log_out.flush()

            if wd != wdr: shutil.copy2(xac_file, wdr)

            if prep_dials_files: prepare_dials_files(wd, out, moveto=wdr)
            return (wdr, (numpy.array(xac.symm.unit_cell().parameters()),
                          os.path.join(wdr, os.path.basename(xac_file))))

    xdsinp = os.path.join(wd, "XDS.INP")
    cosets = reindex.reindexing_operators(reference_symm, xac.symm, 0.2, 20)

    if len(cosets.combined_cb_ops()) == 0:
        print >> out, "Can't find operator:"
        sym.show_summary(out, " ")
        reference_symm.show_summary(out, " ")
        log_out.write(out.getvalue())
        log_out.flush()
        return (wdr, None)

    newcell = reference_symm.space_group().average_unit_cell(
        xac.symm.change_basis(cosets.combined_cb_ops()[0]).unit_cell())
    newcell = " ".join(map(lambda x: "%.3f" % x, newcell.parameters()))
    print >> out, "Scaling with transformed cell:", newcell

    #for f in xds_files.generated_by_CORRECT:
    #    util.rotate_file(os.path.join(wd, f))
    bk_prefix = make_backup(xds_files.generated_by_CORRECT,
                            wdir=wd,
                            quiet=True)

    modify_xdsinp(
        xdsinp,
        inp_params=[
            ("JOB", "CORRECT"),
            ("SPACE_GROUP_NUMBER", "%d" % sgnum),
            ("UNIT_CELL_CONSTANTS", newcell),
            ("INCLUDE_RESOLUTION_RANGE", "50 0"),
            ("CORRECTIONS", ""),
            ("NBATCH", "1"),
            ("MINIMUM_I/SIGMA", None),  # use default
            ("REFINE(CORRECT)", None),  # use default
        ])
    run_xds(wd)
    for f in ("XDS.INP", "CORRECT.LP", "XDS_ASCII.HKL", "GXPARM.XDS"):
        if os.path.exists(os.path.join(wd, f)):
            shutil.copyfile(os.path.join(wd, f),
                            os.path.join(wdr, f + "_rescale"))

    revert_files(xds_files.generated_by_CORRECT,
                 bk_prefix,
                 wdir=wd,
                 quiet=True)

    new_xac = os.path.join(wdr, "XDS_ASCII.HKL_rescale")

    if prep_dials_files:
        prepare_dials_files(wd,
                            out,
                            space_group=reference_symm.space_group(),
                            reindex_op=cosets.combined_cb_ops()[0],
                            moveto=wdr)

    ret = None
    if os.path.isfile(new_xac):
        ret = (XDS_ASCII(new_xac,
                         read_data=False).symm.unit_cell().parameters(),
               new_xac)
        print >> out, " OK:", ret[0]
    else:
        print >> out, "Error: rescaling failed (Can't find XDS_ASCII.HKL)"

    return (wd, ret)
Example #39
0
def xds_sequence(root, params):
    print
    print os.path.relpath(root, params.topdir)

    init_lp = os.path.join(root, "INIT.LP")
    xparm = os.path.join(root, "XPARM.XDS")
    gxparm = os.path.join(root, "GXPARM.XDS")
    defpix_lp = os.path.join(root, "DEFPIX.LP")
    correct_lp = os.path.join(root, "CORRECT.LP")
    integrate_hkl = os.path.join(root, "INTEGRATE.HKL")
    xac_hkl = os.path.join(root, "XDS_ASCII.HKL")
    integrate_lp = os.path.join(root, "INTEGRATE.LP")
    spot_xds = os.path.join(root, "SPOT.XDS")
    xdsinp = os.path.join(root, "XDS.INP")

    assert os.path.isfile(xdsinp)
    if params.cell_prior.force: assert params.cell_prior.check

    xdsinp_dict = dict(get_xdsinp_keyword(xdsinp))

    if params.cell_prior.sgnum > 0:
        xs_prior = crystal.symmetry(params.cell_prior.cell,
                                    params.cell_prior.sgnum)
    else:
        xs_prior = None

    decilog = multi_out()
    decilog.register("log",
                     open(os.path.join(root, "decision.log"), "a"),
                     atexit_send_to=None)
    try:
        print >> decilog, "xds_sequence started at %s in %s\n" % (
            time.strftime("%Y-%m-%d %H:%M:%S"), root)

        if not kamo_test_installation.tst_xds():
            print >> decilog, "XDS is not installed or expired!!"
            return

        if params.show_progress:
            decilog.register("stdout", sys.stdout)

        if params.mode == "initial" and params.resume and os.path.isfile(
                correct_lp):
            print >> decilog, " Already processed."
            return

        if params.mode == "recycle" and not os.path.isfile(gxparm):
            print >> decilog, "GXPARM.XDS not found. Cannot do recycle."
            return

        if params.fast_delphi and (params.nproc is None or params.nproc > 1):
            delphi = optimal_delphi_by_nproc(xdsinp=xdsinp, nproc=params.nproc)
            print >> decilog, " Setting delphi to ", delphi
            modify_xdsinp(xdsinp, inp_params=[
                ("DELPHI", str(delphi)),
            ])

        if params.nproc is not None and params.nproc > 1:
            modify_xdsinp(xdsinp,
                          inp_params=[
                              ("MAXIMUM_NUMBER_OF_PROCESSORS",
                               str(params.nproc)),
                          ])

        if params.mode == "initial":
            modify_xdsinp(xdsinp, inp_params=[("JOB", "XYCORR INIT")])
            run_xds(wdir=root, show_progress=params.show_progress)
            initlp = InitLp(init_lp)
            first_bad = initlp.check_bad_first_frames()
            if first_bad:
                print >> decilog, " first frames look bad (too weak) exposure:", first_bad
                new_data_range = map(
                    int,
                    dict(get_xdsinp_keyword(xdsinp))["DATA_RANGE"].split())
                new_data_range[0] = first_bad[-1] + 1
                print >> decilog, " changing DATA_RANGE= to", new_data_range
                modify_xdsinp(xdsinp,
                              inp_params=[("JOB", "INIT"),
                                          ("DATA_RANGE",
                                           "%d %d" % tuple(new_data_range))])
                for f in xds_files.generated_by_INIT:
                    util.rotate_file(os.path.join(root, f), copy=False)
                run_xds(wdir=root, show_progress=params.show_progress)

            # Peak search
            modify_xdsinp(xdsinp, inp_params=[("JOB", "COLSPOT")])
            run_xds(wdir=root, show_progress=params.show_progress)
            if params.auto_frame_exclude_spot_based:
                sx = idxreflp.SpotXds(spot_xds)
                sx.set_xdsinp(xdsinp)
                spots = filter(lambda x: 5 < x[-1] < 30,
                               sx.collected_spots())  # low-res (5 A)
                frame_numbers = numpy.array(map(lambda x: int(x[2]) + 1,
                                                spots))
                data_range = map(
                    int,
                    dict(get_xdsinp_keyword(xdsinp))["DATA_RANGE"].split())
                # XXX this assumes SPOT_RANGE equals to DATA_RANGE. Is this guaranteed?
                h = numpy.histogram(frame_numbers,
                                    bins=numpy.arange(data_range[0],
                                                      data_range[1] + 2,
                                                      step=1))
                q14 = numpy.percentile(h[0], [25, 75])
                iqr = q14[1] - q14[0]
                cutoff = max(
                    h[0][h[0] <= iqr * 1.5 + q14[1]]) / 5  # magic number
                print >> decilog, "DEBUG:: IQR= %.2f, Q1/4= %s, cutoff= %.2f" % (
                    iqr, q14, cutoff)
                cut_frames = h[1][h[0] < cutoff]
                keep_frames = h[1][h[0] >= cutoff]
                print >> decilog, "DEBUG:: keep_frames=", keep_frames
                print >> decilog, "DEBUG::  cut_frames=", cut_frames

                if len(cut_frames) > 0:
                    cut_ranges = [
                        [cut_frames[0], cut_frames[0]],
                    ]
                    for fn in cut_frames:
                        if fn - cut_ranges[-1][1] <= 1: cut_ranges[-1][1] = fn
                        else: cut_ranges.append([fn, fn])

                    # Edit XDS.INP
                    cut_inp_str = "".join(
                        map(
                            lambda x: "EXCLUDE_DATA_RANGE= %6d %6d\n" % tuple(
                                x), cut_ranges))
                    open(xdsinp, "a").write("\n" + cut_inp_str)

                    # Edit SPOT.XDS
                    shutil.copyfile(spot_xds, spot_xds + ".org")
                    sx.write(open(spot_xds, "w"),
                             frame_selection=set(keep_frames))

            # Indexing
            if params.cell_prior.method == "use_first":
                modify_xdsinp(xdsinp,
                              inp_params=[
                                  ("JOB", "IDXREF"),
                                  ("UNIT_CELL_CONSTANTS", " ".join(
                                      map(lambda x: "%.3f" % x,
                                          params.cell_prior.cell))),
                                  ("SPACE_GROUP_NUMBER",
                                   "%d" % params.cell_prior.sgnum),
                              ])
            else:
                modify_xdsinp(xdsinp, inp_params=[("JOB", "IDXREF")])

            run_xds(wdir=root, show_progress=params.show_progress)
            print >> decilog, ""  # TODO indexing stats like indexed percentage here.

            if params.tryhard:
                try_indexing_hard(root,
                                  params.show_progress,
                                  decilog,
                                  known_sgnum=params.cell_prior.sgnum,
                                  known_cell=params.cell_prior.cell,
                                  tol_length=params.cell_prior.tol_length,
                                  tol_angle=params.cell_prior.tol_angle)

            if not os.path.isfile(xparm):
                print >> decilog, " Indexing failed."
                return

            if params.cell_prior.sgnum > 0:
                # Check anyway
                xsxds = XPARM(xparm).crystal_symmetry()
                cosets = reindex.reindexing_operators(
                    xs_prior, xsxds, params.cell_prior.tol_length,
                    params.cell_prior.tol_angle)
                if cosets.double_cosets is None:
                    if params.cell_prior.check:
                        print >> decilog, " Incompatible cell. Indexing failed."
                        return
                    else:
                        print >> decilog, " Warning: Incompatible cell."

                elif params.cell_prior.method == "symm_constraint_only":
                    cell = xsxds.unit_cell().change_basis(
                        cosets.combined_cb_ops()[0])
                    print >> decilog, " Trying symmetry-constrained cell parameter:", cell
                    modify_xdsinp(xdsinp,
                                  inp_params=[
                                      ("JOB", "IDXREF"),
                                      ("UNIT_CELL_CONSTANTS", " ".join(
                                          map(lambda x: "%.3f" % x,
                                              cell.parameters()))),
                                      ("SPACE_GROUP_NUMBER",
                                       "%d" % params.cell_prior.sgnum),
                                  ])
                    for f in xds_files.generated_by_IDXREF:
                        util.rotate_file(os.path.join(root, f),
                                         copy=(f == "SPOT.XDS"))

                    run_xds(wdir=root, show_progress=params.show_progress)

                    if not os.path.isfile(xparm):
                        print >> decilog, " Indexing failed."
                        return

                    # Check again
                    xsxds = XPARM(xparm).crystal_symmetry()
                    if not xsxds.unit_cell().is_similar_to(
                            xs_prior.unit_cell(), params.cell_prior.tol_length,
                            params.cell_prior.tol_angle):
                        print >> decilog, "  Resulted in different cell. Indexing failed."
                        return

        elif params.mode == "recycle":
            print >> decilog, " Start recycle. original ISa= %.2f" % correctlp.get_ISa(
                correct_lp, check_valid=True)
            for f in xds_files.generated_after_DEFPIX + ("XPARM.XDS",
                                                         "plot_integrate.log"):
                util.rotate_file(os.path.join(root, f), copy=True)
            shutil.copyfile(gxparm + ".1", xparm)
        else:
            raise "Unknown mode (%s)" % params.mode

        # To Integration
        modify_xdsinp(xdsinp,
                      inp_params=[("JOB", "DEFPIX INTEGRATE"),
                                  ("INCLUDE_RESOLUTION_RANGE", "50 0")])
        run_xds(wdir=root, show_progress=params.show_progress)
        if os.path.isfile(integrate_lp):
            xds_plot_integrate.run(integrate_lp,
                                   os.path.join(root, "plot_integrate.log"))
        if not os.path.isfile(integrate_hkl):
            print >> decilog, " Integration failed."
            return

        # Make _noscale.HKL if needed
        if params.no_scaling:
            bk_prefix = make_backup(("XDS.INP", ), wdir=root, quiet=True)
            xparm_obj = XPARM(xparm)
            modify_xdsinp(xdsinp,
                          inp_params=[
                              ("JOB", "CORRECT"),
                              ("CORRECTIONS", ""),
                              ("NBATCH", "1"),
                              ("MINIMUM_I/SIGMA", "50"),
                              ("REFINE(CORRECT)", ""),
                              ("UNIT_CELL_CONSTANTS", " ".join(
                                  map(lambda x: "%.3f" % x,
                                      xparm_obj.unit_cell))),
                              ("SPACE_GROUP_NUMBER",
                               "%d" % xparm_obj.spacegroup),
                          ])
            print >> decilog, " running CORRECT without empirical scaling"
            run_xds(wdir=root, show_progress=params.show_progress)
            for f in xds_files.generated_by_CORRECT + ("XDS.INP", ):
                ff = os.path.join(root, f)
                if not os.path.isfile(ff): continue
                if ff.endswith(".cbf"):
                    os.remove(ff)
                else:
                    os.rename(ff, ff + "_noscale")

            revert_files(("XDS.INP", ), bk_prefix, wdir=root, quiet=True)

        # Run pointless
        pointless_integrate = {}
        if params.use_pointless:
            worker = Pointless()
            pointless_integrate = worker.run_for_symm(
                xdsin=integrate_hkl,
                logout=os.path.join(root, "pointless_integrate.log"))
            if "symm" in pointless_integrate:
                symm = pointless_integrate["symm"]
                print >> decilog, " pointless using INTEGRATE.HKL suggested", symm.space_group_info(
                )
                if xs_prior:
                    if xtal.is_same_space_group_ignoring_enantiomorph(
                            symm.space_group(), xs_prior.space_group()):
                        print >> decilog, " which is consistent with given symmetry."
                    elif xtal.is_same_laue_symmetry(symm.space_group(),
                                                    xs_prior.space_group()):
                        print >> decilog, " which has consistent Laue symmetry with given symmetry."
                    else:
                        print >> decilog, " which is inconsistent with given symmetry."

                sgnum = symm.space_group_info().type().number()
                cell = " ".join(
                    map(lambda x: "%.2f" % x,
                        symm.unit_cell().parameters()))
                modify_xdsinp(xdsinp,
                              inp_params=[("SPACE_GROUP_NUMBER", "%d" % sgnum),
                                          ("UNIT_CELL_CONSTANTS", cell)])
            else:
                print >> decilog, " pointless failed."

        flag_do_not_change_symm = False

        if xs_prior and params.cell_prior.force:
            modify_xdsinp(xdsinp,
                          inp_params=[("UNIT_CELL_CONSTANTS", " ".join(
                              map(lambda x: "%.3f" % x,
                                  params.cell_prior.cell))),
                                      ("SPACE_GROUP_NUMBER",
                                       "%d" % params.cell_prior.sgnum)])
            flag_do_not_change_symm = True
        elif params.cell_prior.method == "correct_only":
            xsxds = XPARM(xparm).crystal_symmetry()
            cosets = reindex.reindexing_operators(xs_prior, xsxds,
                                                  params.cell_prior.tol_length,
                                                  params.cell_prior.tol_angle)
            if cosets.double_cosets is not None:
                cell = xsxds.unit_cell().change_basis(
                    cosets.combined_cb_ops()[0])
                print >> decilog, " Using given symmetry in CORRECT with symmetry constraints:", cell
                modify_xdsinp(xdsinp,
                              inp_params=[
                                  ("UNIT_CELL_CONSTANTS", " ".join(
                                      map(lambda x: "%.3f" % x,
                                          cell.parameters()))),
                                  ("SPACE_GROUP_NUMBER",
                                   "%d" % params.cell_prior.sgnum),
                              ])
                flag_do_not_change_symm = True
            else:
                print >> decilog, " Tried to use given symmetry in CORRECT, but cell in integration is incompatible."

        # Do Scaling
        modify_xdsinp(xdsinp, inp_params=[
            ("JOB", "CORRECT"),
        ])

        run_xds(wdir=root, show_progress=params.show_progress)

        if not os.path.isfile(xac_hkl):
            print >> decilog, " CORRECT failed."
            return

        if not os.path.isfile(gxparm):
            print >> decilog, " Refinement in CORRECT failed."

        print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa(
            correct_lp, check_valid=True)

        ret = calc_merging_stats(xac_hkl)
        if params.cut_resolution:
            if ret is not None and ret[0] is not None:
                d_min = ret[0]
                modify_xdsinp(xdsinp,
                              inp_params=[("JOB", "CORRECT"),
                                          ("INCLUDE_RESOLUTION_RANGE",
                                           "50 %.2f" % d_min)])
                print >> decilog, " Re-scale at %.2f A" % d_min
                os.rename(os.path.join(root, "CORRECT.LP"),
                          os.path.join(root, "CORRECT_fullres.LP"))
                os.rename(xac_hkl, os.path.join(root, "XDS_ASCII_fullres.HKL"))
                run_xds(wdir=root, show_progress=params.show_progress)
                print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa(
                    correct_lp, check_valid=True)
                print >> decilog, " (Original files are saved as *_fullres.*)"
            else:
                print >> decilog, "error: Can't decide resolution."

        last_ISa = correctlp.get_ISa(correct_lp, check_valid=True)

        # Run pointless and (if result is different from INTEGRATE) re-scale.
        if params.use_pointless:
            worker = Pointless()
            pointless_correct = worker.run_for_symm(
                xdsin=xac_hkl,
                logout=os.path.join(root, "pointless_correct.log"))
            pointless_best_symm = None

            if "symm" in pointless_correct:
                symm = pointless_correct["symm"]
                need_rescale = False

                if pointless_integrate.get("symm"):
                    symm_by_integrate = pointless_integrate["symm"]

                    if not xtal.is_same_laue_symmetry(
                            symm_by_integrate.space_group(),
                            symm.space_group()):
                        print >> decilog, "pointless suggested %s, which is different Laue symmetry from INTEGRATE.HKL (%s)" % (
                            symm.space_group_info(),
                            symm_by_integrate.space_group_info())
                        prob_integrate = pointless_integrate.get(
                            "laue_prob", float("nan"))
                        prob_correct = pointless_correct.get(
                            "laue_prob", float("nan"))

                        print >> decilog, " Prob(%s |INTEGRATE), Prob(%s |CORRECT) = %.4f, %.4f." % (
                            symm_by_integrate.space_group_info(),
                            symm.space_group_info(), prob_integrate,
                            prob_correct)
                        if prob_correct > prob_integrate:
                            need_rescale = True
                            pointless_best_symm = symm
                        else:
                            pointless_best_symm = symm_by_integrate
                else:
                    need_rescale = True
                    pointless_best_symm = symm
                    print >> decilog, "pointless using XDS_ASCII.HKL suggested %s" % symm.space_group_info(
                    )
                    if xs_prior:
                        if xtal.is_same_space_group_ignoring_enantiomorph(
                                symm.space_group(), xs_prior.space_group()):
                            print >> decilog, " which is consistent with given symmetry."
                        elif xtal.is_same_laue_symmetry(
                                symm.space_group(), xs_prior.space_group()):
                            print >> decilog, " which has consistent Laue symmetry with given symmetry."
                        else:
                            print >> decilog, " which is inconsistent with given symmetry."

                if need_rescale and not flag_do_not_change_symm:
                    sgnum = symm.space_group_info().type().number()
                    cell = " ".join(
                        map(lambda x: "%.2f" % x,
                            symm.unit_cell().parameters()))
                    modify_xdsinp(xdsinp,
                                  inp_params=[
                                      ("JOB", "CORRECT"),
                                      ("SPACE_GROUP_NUMBER", "%d" % sgnum),
                                      ("UNIT_CELL_CONSTANTS", cell),
                                      ("INCLUDE_RESOLUTION_RANGE", "50 0")
                                  ])

                    run_xds(wdir=root, show_progress=params.show_progress)

                    ret = calc_merging_stats(xac_hkl)

                    if params.cut_resolution:
                        if ret is not None and ret[0] is not None:
                            d_min = ret[0]
                            modify_xdsinp(xdsinp,
                                          inp_params=[
                                              ("JOB", "CORRECT"),
                                              ("INCLUDE_RESOLUTION_RANGE",
                                               "50 %.2f" % d_min)
                                          ])
                            print >> decilog, " Re-scale at %.2f A" % d_min
                            os.rename(os.path.join(root, "CORRECT.LP"),
                                      os.path.join(root, "CORRECT_fullres.LP"))
                            os.rename(
                                xac_hkl,
                                os.path.join(root, "XDS_ASCII_fullres.HKL"))
                            run_xds(wdir=root,
                                    show_progress=params.show_progress)
                            print >> decilog, " OK. ISa= %.2f" % correctlp.get_ISa(
                                correct_lp, check_valid=True)
                            print >> decilog, " (Original files are saved as *_fullres.*)"
                        else:
                            print >> decilog, "error: Can't decide resolution."
                            for f in ("CORRECT_fullres.LP",
                                      "XDS_ASCII_fullres.HKL"):
                                if os.path.isfile(os.path.join(root, f)):
                                    print >> decilog, "removing", f
                                    os.remove(os.path.join(root, f))

                    ISa = correctlp.get_ISa(correct_lp, check_valid=True)

                    if ISa >= last_ISa or last_ISa != last_ISa:  # if improved or last_ISa is nan
                        print >> decilog, "ISa improved= %.2f" % ISa
                    else:
                        print >> decilog, "ISa got worse= %.2f" % ISa

            if pointless_best_symm:
                xac_symm = XDS_ASCII(xac_hkl, read_data=False).symm
                if not xtal.is_same_space_group_ignoring_enantiomorph(
                        xac_symm.space_group(),
                        pointless_best_symm.space_group()):
                    if xtal.is_same_laue_symmetry(
                            xac_symm.space_group(),
                            pointless_best_symm.space_group()):
                        tmp = "same Laue symmetry"
                    else:
                        tmp = "different Laue symmetry"
                    print >> decilog, "WARNING: symmetry in scaling is different from Pointless result (%s)." % tmp

        run_xdsstat(wdir=root)
        print
        if params.make_report: html_report.make_individual_report(root, root)
    except:
        print >> decilog, traceback.format_exc()
    finally:
        print >> decilog, "\nxds_sequence finished at %s" % time.strftime(
            "%Y-%m-%d %H:%M:%S")
        decilog.close()
Example #40
0
def reindex_with_specified_symm(topdir, reference_symm, dirs, out):
    print >> out
    print >> out, "Re-index to specified symmetry:"
    reference_symm.show_summary(out, "  ")
    print >> out
    print >> out

    cells = {}  # cell and file

    sgnum_laue = reference_symm.space_group(
    ).build_derived_reflection_intensity_group(False).type().number()

    for wd in dirs:
        print >> out, "%s:" % os.path.relpath(wd, topdir),

        # Find appropriate data
        xac_file = util.return_first_found_file(
            ("XDS_ASCII.HKL_noscale.org", "XDS_ASCII.HKL_noscale",
             "XDS_ASCII_fullres.HKL.org", "XDS_ASCII_fullres.HKL",
             "XDS_ASCII.HKL.org", "XDS_ASCII.HKL"),
            wd=wd)
        if xac_file is None:
            print >> out, "Can't find XDS_ASCII file in %s" % wd
            continue

        if xac_file.endswith(".org"):
            xac_file_org, xac_file = xac_file, xac_file[:-4]
        else:
            xac_file_org = xac_file + ".org"

        if not os.path.isfile(xac_file_org):
            os.rename(xac_file, xac_file_org)

        xac = XDS_ASCII(xac_file_org, read_data=False)
        print >> out, "%s %s (%s)" % (
            os.path.basename(xac_file), xac.symm.space_group_info(), ",".join(
                map(lambda x: "%.2f" % x,
                    xac.symm.unit_cell().parameters())))

        if xac.symm.reflection_intensity_symmetry(
                False).space_group_info().type().number() == sgnum_laue:
            if xac.symm.unit_cell().is_similar_to(reference_symm.unit_cell(),
                                                  0.1, 10):
                print >> out, "  Already scaled with specified symmetry"
                os.rename(xac_file_org, xac_file)  # rename back
                cells[wd] = (numpy.array(xac.symm.unit_cell().parameters()),
                             xac_file)
                continue

        cosets = reindex.reindexing_operators(reference_symm, xac.symm, 0.2,
                                              20)

        if len(cosets.combined_cb_ops()) == 0:
            print >> out, "Can't find operator:"
            xac.symm.show_summary(out, " ")
            reference_symm.show_summary(out, " ")
            continue

        newcell = xac.write_reindexed(op=cosets.combined_cb_ops()[0],
                                      space_group=reference_symm.space_group(),
                                      hklout=xac_file)
        cells[wd] = (numpy.array(newcell.parameters()), xac_file)

        newcell = " ".join(map(lambda x: "%.3f" % x, newcell.parameters()))
        print >> out, "  Reindexed to transformed cell: %s with %s" % (
            newcell, cosets.combined_cb_ops()[0].as_hkl())

    return cells
Example #41
0
def run(lstin, params):
    xac_files = read_path_list(lstin)

    common0 = len(os.path.commonprefix(xac_files))

    arrays = []

    for f in xac_files:
        xac = XDS_ASCII(f, i_only=True)
        xac.remove_rejected()
        a = xac.i_obs().resolution_filter(d_min=params.d_min, d_max=params.d_max)
        a = a.merge_equivalents(use_internal_variance=False).array()
        a = a.select(a.data() / a.sigmas() >= params.min_ios)
        arrays.append(a)

    # Prep
    args = []
    for i in xrange(len(arrays) - 1):
        for j in xrange(i + 1, len(arrays)):
            args.append((i, j))

    # Calc all CC
    worker = lambda x: calc_cc(arrays[x[0]], arrays[x[1]])
    results = easy_mp.pool_map(fixed_func=worker, args=args, processes=params.nproc)

    # Make matrix
    mat = numpy.zeros(shape=(len(arrays), len(arrays)))
    for (i, j), (cc, nref) in zip(args, results):
        print j, i, cc
        mat[j, i] = cc

    open("%s.names" % params.prefix, "w").write("\n".join(map(lambda x: os.path.dirname(x[common0:]), xac_files)))
    open("%s.matrix" % params.prefix, "w").write(" ".join(map(lambda x: "%.4f" % x, mat.flatten())))

    ofs = open("%s.dat" % params.prefix, "w")
    ofs.write("i j cc nref\n")
    for (i, j), (cc, nref) in zip(args, results):
        ofs.write("%4d %4d %.4f %4d\n" % (i, j, cc, nref))

    open("%s_ana.R" % params.prefix, "w").write(
        """\
treeToList2 <- function(htree)
{  # stolen from $CCP4/share/blend/R/blend0.R
 groups <- list()
 itree <- dim(htree$merge)[1]
 for (i in 1:itree)
 { 
  il <- htree$merge[i,1]
  ir <- htree$merge[i,2]
  if (il < 0) lab1 <- htree$labels[-il]
  if (ir < 0) lab2 <- htree$labels[-ir]
  if (il > 0) lab1 <- groups[[il]]
  if (ir > 0) lab2 <- groups[[ir]]
  lab <- c(lab1,lab2)
  lab <- as.integer(lab)
  groups <- c(groups,list(lab))
 }
 return(groups)
}

cc<-scan("%s.matrix")
md<-matrix(1-cc, ncol=%d, byrow=TRUE)
labs<-read.table("%s.names")
filenames<-read.table("%s")$V1
rownames(md)<-labs$V1
hc <- hclust(as.dist(md),method="ward")
pdf("tree.pdf")
plot(hc)
dev.off()

hc$labels <- 1:nrow(md)
groups <- treeToList2(hc)
cat("ClNumber             Nds         Clheight\\n",file="./CLUSTERS.txt")
for (i in 1:length(groups))
{
 sorted_groups <- sort(groups[[i]])
 linea <- paste(sprintf("     %%03d           %%3d         %%7.3f\\n",
                i,length(groups[[i]]),hc$height[i]),sep="")
 cat(linea, file="./CLUSTERS.txt", append=TRUE)
 write.table(filenames[sorted_groups], sprintf("cluster%%.3d.lst",i), quote=FALSE, row.names=FALSE, col.names=FALSE)
}

q(save="yes")
"""
        % (params.prefix, len(arrays), params.prefix, lstin)
    )
    print "R --vanilla < %s_ana.R" % params.prefix