Example #1
0
    def run(self):
        from mmtbx.scaling import absolute_scaling
        iso_scale_and_b_obs = absolute_scaling.ml_iso_absolute_scaling(miller_array=self.obs,
                                                                       n_residues=100)
        iso_scale_and_b_calc = absolute_scaling.ml_iso_absolute_scaling(miller_array=self.calc,
                                                                       n_residues=100)
        b_obs = iso_scale_and_b_obs.b_wilson
        b_calc = iso_scale_and_b_calc.b_wilson

        B = -2*(b_calc - b_obs)
        k = kBdecider2.get_linear_scale(self.obs, self.calc, B)
        print "k,B=",k,B
        return k, B
Example #2
0
  def ml_normalisation(self, aniso=False):
    # estimate number of residues per unit cell
    mr = matthews.matthews_rupp(self.intensities.crystal_symmetry())
    n_residues = mr.n_residues

    # estimate B-factor and scale factors for normalisation
    if aniso:
      normalisation = absolute_scaling.ml_aniso_absolute_scaling(
        self.intensities, n_residues=n_residues)
      u_star = normalisation.u_star
    else:
      normalisation = absolute_scaling.ml_iso_absolute_scaling(
        self.intensities, n_residues=n_residues)
      u_star = adptbx.b_as_u(
        adptbx.u_iso_as_u_star(
          self.intensities.unit_cell(), normalisation.b_wilson))

    # apply scales
    self.intensities = self.intensities.customized_copy(
      data=scaling.ml_normalise_aniso(
        self.intensities.indices(), self.intensities.data(),
        normalisation.p_scale, self.intensities.unit_cell(),
        u_star),
      sigmas=scaling.ml_normalise_aniso(
        self.intensities.indices(), self.intensities.sigmas(),
        normalisation.p_scale, self.intensities.unit_cell(),
        u_star)).set_info(self.intensities.info())

    # record output in log file
    s = StringIO()
    mr.show(out=s)
    normalisation.show(out=s)
    logger.info(s.getvalue())
def wilson_scaling(F, n_residues, n_bases):
    from mmtbx.scaling import absolute_scaling
    iso_scale = absolute_scaling.ml_iso_absolute_scaling(miller_array=F,
                                                         n_residues=n_residues,
                                                         n_bases=n_bases)
    aniso_scale = absolute_scaling.ml_aniso_absolute_scaling(
        miller_array=F, n_residues=n_residues, n_bases=n_bases)
    return iso_scale, aniso_scale
Example #4
0
    def _ml_normalisation(intensities, aniso):
        # estimate number of residues per unit cell
        mr = matthews.matthews_rupp(intensities.crystal_symmetry())
        n_residues = mr.n_residues

        # estimate B-factor and scale factors for normalisation
        if aniso:
            normalisation = absolute_scaling.ml_aniso_absolute_scaling(
                intensities, n_residues=n_residues
            )
            u_star = normalisation.u_star
        else:
            normalisation = absolute_scaling.ml_iso_absolute_scaling(
                intensities, n_residues=n_residues
            )
            u_star = adptbx.b_as_u(
                adptbx.u_iso_as_u_star(intensities.unit_cell(), normalisation.b_wilson)
            )

        # record output in log file
        if aniso:
            b_cart = normalisation.b_cart
            logger.info("ML estimate of overall B_cart value:")
            logger.info(
                """\
  %5.2f, %5.2f, %5.2f
  %12.2f, %5.2f
  %19.2f"""
                % (b_cart[0], b_cart[3], b_cart[4], b_cart[1], b_cart[5], b_cart[2])
            )
        else:
            logger.info("ML estimate of overall B value:")
            logger.info("   %5.2f A**2" % normalisation.b_wilson)
        logger.info("ML estimate of  -log of scale factor:")
        logger.info("  %5.2f" % (normalisation.p_scale))

        s = StringIO()
        mr.show(out=s)
        normalisation.show(out=s)
        logger.debug(s.getvalue())

        # apply scales
        return intensities.customized_copy(
            data=scaling.ml_normalise_aniso(
                intensities.indices(),
                intensities.data(),
                normalisation.p_scale,
                intensities.unit_cell(),
                u_star,
            ),
            sigmas=scaling.ml_normalise_aniso(
                intensities.indices(),
                intensities.sigmas(),
                normalisation.p_scale,
                intensities.unit_cell(),
                u_star,
            ),
        )
Example #5
0
def set_chunk_stats(chunk, stats, stat_choice, n_residues=None, ref_cell=None, space_group=None, d_min=None, ref_data=None):
    if "reslimit" in stat_choice: stats["reslimit"].append(chunk.res_lim)
    else: stats["reslimit"].append(float("nan"))

    if "pr" in stat_choice: stats["pr"].append(chunk.profile_radius)
    else: stats["pr"].append(float("nan"))

    stats["ccref"].append(float("nan"))

    if set(["ioversigma","resnatsnr1","ccref"]).intersection(stat_choice):
        iobs = chunk.data_array(space_group, False)
        iobs = iobs.select(iobs.sigmas()>0).merge_equivalents(use_internal_variance=False).array()
        binner = iobs.setup_binner(auto_binning=True)

        if "resnatsnr1" in stat_choice:
            res = float("nan")
            for i_bin in binner.range_used():
                sel = binner.selection(i_bin)
                tmp = iobs.select(sel)
                if tmp.size() == 0: continue
                sn = flex.mean(tmp.data()/tmp.sigmas())
                if sn <= 1:
                    res = binner.bin_d_range(i_bin)[1]
                    break

            stats["resnatsnr1"].append(res)
        else:
            stats["resnatsnr1"].append(float("nan"))

        if d_min: iobs = iobs.resolution_filter(d_min=d_min)

        if "ccref" in stat_choice:
            corr = iobs.correlation(ref_data, assert_is_similar_symmetry=False)
            if corr.is_well_defined(): stats["ccref"][-1] = corr.coefficient()

        if "ioversigma" in stat_choice:
            stats["ioversigma"].append(flex.mean(iobs.data()/iobs.sigmas()))
        else:
            stats["ioversigma"].append(float("nan"))

    else:
        stats["ioversigma"].append(float("nan"))
        stats["resnatsnr1"].append(float("nan"))

    if "abdist" in stat_choice:
        from cctbx.uctbx.determine_unit_cell import NCDist
        G6a, G6b = make_G6(ref_cell), make_G6(chunk.cell)
        abdist = NCDist(G6a, G6b)
        stats["abdist"].append(abdist)
    else:
        stats["abdist"].append(float("nan"))
    
    if "wilsonb" in stat_choice:
        iso_scale_and_b = ml_iso_absolute_scaling(iobs, n_residues, 0)
        stats["wilsonb"].append(iso_scale_and_b.b_wilson)
    else:
        stats["wilsonb"].append(float("nan"))
def wilson_scaling (F, n_residues, n_bases) :
  from mmtbx.scaling import absolute_scaling
  iso_scale = absolute_scaling.ml_iso_absolute_scaling(
    miller_array=F,
    n_residues=n_residues,
    n_bases=n_bases)
  aniso_scale = absolute_scaling.ml_aniso_absolute_scaling(
    miller_array=F,
    n_residues=n_residues,
    n_bases=n_bases)
  return iso_scale, aniso_scale
Example #7
0
def test_scaling_on_random_data(B_add):
    miller_array = random_data(B_add, n_residues=100.0)
    scale_object_iso = absolute_scaling.ml_iso_absolute_scaling(
        miller_array, n_residues=100.0)

    ## compare the results please
    assert approx_equal(B_add, scale_object_iso.b_wilson, eps=5)

    scale_object_aniso = absolute_scaling.ml_aniso_absolute_scaling(
        miller_array, n_residues=100.0)

    assert approx_equal(B_add, scale_object_aniso.b_cart[0], eps=5)
    assert approx_equal(B_add, scale_object_aniso.b_cart[1], eps=5)
    assert approx_equal(B_add, scale_object_aniso.b_cart[2], eps=5)
Example #8
0
def test_scaling_on_random_data(B_add):
  miller_array = random_data(B_add,n_residues=100.0)
  scale_object_iso = absolute_scaling.ml_iso_absolute_scaling(
    miller_array,
    n_residues=100.0)

  ## compare the results please
  assert approx_equal(B_add, scale_object_iso.b_wilson, eps=5)

  scale_object_aniso = absolute_scaling.ml_aniso_absolute_scaling(
    miller_array,
    n_residues=100.0)

  assert approx_equal(B_add, scale_object_aniso.b_cart[0], eps=5)
  assert approx_equal(B_add, scale_object_aniso.b_cart[1], eps=5)
  assert approx_equal(B_add, scale_object_aniso.b_cart[2], eps=5)
Example #9
0
def set_chunk_stats(chunk,
                    stats,
                    stat_choice,
                    n_residues=None,
                    ref_cell=None,
                    space_group=None,
                    d_min=None,
                    ref_data=None):
    if "reslimit" in stat_choice: stats["reslimit"].append(chunk.res_lim)
    else: stats["reslimit"].append(float("nan"))

    if "pr" in stat_choice: stats["pr"].append(chunk.profile_radius)
    else: stats["pr"].append(float("nan"))

    stats["ccref"].append(float("nan"))

    if set(["ioversigma", "resnatsnr1", "ccref"]).intersection(stat_choice):
        iobs = chunk.data_array(space_group, False)
        iobs = iobs.select(iobs.sigmas() > 0).merge_equivalents(
            use_internal_variance=False).array()
        binner = iobs.setup_binner(auto_binning=True)

        if "resnatsnr1" in stat_choice:
            res = float("nan")
            for i_bin in binner.range_used():
                sel = binner.selection(i_bin)
                tmp = iobs.select(sel)
                if tmp.size() == 0: continue
                sn = flex.mean(tmp.data() / tmp.sigmas())
                if sn <= 1:
                    res = binner.bin_d_range(i_bin)[1]
                    break

            stats["resnatsnr1"].append(res)
        else:
            stats["resnatsnr1"].append(float("nan"))

        if d_min: iobs = iobs.resolution_filter(d_min=d_min)

        if "ccref" in stat_choice:
            corr = iobs.correlation(ref_data, assert_is_similar_symmetry=False)
            if corr.is_well_defined(): stats["ccref"][-1] = corr.coefficient()

        if "ioversigma" in stat_choice:
            stats["ioversigma"].append(flex.mean(iobs.data() / iobs.sigmas()))
        else:
            stats["ioversigma"].append(float("nan"))

    else:
        stats["ioversigma"].append(float("nan"))
        stats["resnatsnr1"].append(float("nan"))

    if "abdist" in stat_choice:
        from cctbx.uctbx.determine_unit_cell import NCDist
        G6a, G6b = make_G6(ref_cell), make_G6(chunk.cell)
        abdist = NCDist(G6a, G6b)
        stats["abdist"].append(abdist)
    else:
        stats["abdist"].append(float("nan"))

    if "wilsonb" in stat_choice:
        iso_scale_and_b = ml_iso_absolute_scaling(iobs, n_residues, 0)
        stats["wilsonb"].append(iso_scale_and_b.b_wilson)
    else:
        stats["wilsonb"].append(float("nan"))
Example #10
0
    def do_clustering(self,
                      nproc=1,
                      b_scale=False,
                      use_normalized=False,
                      html_maker=None):
        self.clusters = {}
        prefix = os.path.join(self.wdir, "cctable")
        assert (b_scale, use_normalized).count(True) <= 1

        if len(self.arrays) < 2:
            print "WARNING: less than two data! can't do cc-based clustering"
            self.clusters[1] = [float("nan"), [0]]
            return

        # Absolute scaling using Wilson-B factor
        if b_scale:
            from mmtbx.scaling.matthews import p_vm_calculator
            from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling

            ofs_wilson = open("%s_wilson_scales.dat" % prefix, "w")
            n_residues = p_vm_calculator(self.arrays.values()[0], 1,
                                         0).best_guess
            ofs_wilson.write("# guessed n_residues= %d\n" % n_residues)
            ofs_wilson.write("file wilsonB\n")
            for f in self.arrays:
                arr = self.arrays[f]
                iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0)
                wilson_b = iso_scale_and_b.b_wilson
                ofs_wilson.write("%s %.3f\n" % (f, wilson_b))
                if wilson_b > 0:  # Ignoring data with B<0? is a bad idea.. but how..?
                    tmp = flex.exp(-2. * wilson_b *
                                   arr.unit_cell().d_star_sq(arr.indices()) /
                                   4.)
                    self.arrays[f] = arr.customized_copy(data=arr.data() * tmp,
                                                         sigmas=arr.sigmas() *
                                                         tmp)
            ofs_wilson.close()

        elif use_normalized:
            from mmtbx.scaling.absolute_scaling import kernel_normalisation
            for f in self.arrays:
                arr = self.arrays[f]
                normaliser = kernel_normalisation(arr, auto_kernel=True)
                self.arrays[f] = arr.customized_copy(
                    data=arr.data() / normaliser.normalizer_for_miller_array,
                    sigmas=arr.sigmas() /
                    normaliser.normalizer_for_miller_array)
        # Prep
        args = []
        for i in xrange(len(self.arrays) - 1):
            for j in xrange(i + 1, len(self.arrays)):
                args.append((i, j))

        # Calc all CC
        if self.use_sfdist:
            worker = lambda x: calc_sfdist(self.arrays.values()[x[0]],
                                           self.arrays.values()[x[1]])
        else:
            worker = lambda x: calc_cc(self.arrays.values()[x[0]],
                                       self.arrays.values()[x[1]])
        results = easy_mp.pool_map(fixed_func=worker,
                                   args=args,
                                   processes=nproc)

        # Check NaN and decide which data to remove
        idx_bad = {}
        nans = []
        cc_data_for_html = []
        for (i, j), (cc, nref) in zip(args, results):
            cc_data_for_html.append((i, j, cc, nref))
            if cc == cc: continue
            idx_bad[i] = idx_bad.get(i, 0) + 1
            idx_bad[j] = idx_bad.get(j, 0) + 1
            nans.append([i, j])

        if html_maker is not None:
            html_maker.add_cc_clustering_details(cc_data_for_html)

        idx_bad = idx_bad.items()
        idx_bad.sort(key=lambda x: x[1])
        remove_idxes = set()

        for idx, badcount in reversed(idx_bad):
            if len(filter(lambda x: idx in x, nans)) == 0: continue
            remove_idxes.add(idx)
            nans = filter(lambda x: idx not in x, nans)
            if len(nans) == 0: break

        use_idxes = filter(lambda x: x not in remove_idxes,
                           xrange(len(self.arrays)))

        # Make table: original index (in file list) -> new index (in matrix)
        count = 0
        org2now = collections.OrderedDict()
        for i in xrange(len(self.arrays)):
            if i in remove_idxes: continue
            org2now[i] = count
            count += 1

        if len(remove_idxes) > 0:
            open("%s_notused.lst" % prefix, "w").write("\n".join(
                map(lambda x: self.arrays.keys()[x], remove_idxes)))

        # Make matrix
        mat = numpy.zeros(shape=(len(use_idxes), len(use_idxes)))
        for (i, j), (cc, nref) in zip(args, results):
            if i in remove_idxes or j in remove_idxes: continue
            mat[org2now[j], org2now[i]] = cc

        open("%s.matrix" % prefix,
             "w").write(" ".join(map(lambda x: "%.4f" % x, mat.flatten())))

        ofs = open("%s.dat" % prefix, "w")
        ofs.write("   i    j     cc  nref\n")
        for (i, j), (cc, nref) in zip(args, results):
            ofs.write("%4d %4d %.4f %4d\n" % (i, j, cc, nref))

        open("%s_ana.R" % prefix, "w").write("""\
treeToList2 <- function(htree)
{  # stolen from $CCP4/share/blend/R/blend0.R
 groups <- list()
 itree <- dim(htree$merge)[1]
 for (i in 1:itree)
 { 
  il <- htree$merge[i,1]
  ir <- htree$merge[i,2]
  if (il < 0) lab1 <- htree$labels[-il]
  if (ir < 0) lab2 <- htree$labels[-ir]
  if (il > 0) lab1 <- groups[[il]]
  if (ir > 0) lab2 <- groups[[ir]]
  lab <- c(lab1,lab2)
  lab <- as.integer(lab)
  groups <- c(groups,list(lab))
 }
 return(groups)
}

cc<-scan("%(prefix)s.matrix")
md<-matrix(1-cc, ncol=%(ncol)d, byrow=TRUE)
hc <- hclust(as.dist(md),method="ward")
pdf("tree.pdf")
plot(hc)
dev.off()
png("tree.png",height=1000,width=1000)
plot(hc)
dev.off()

hc$labels <- c(%(hclabels)s)
groups <- treeToList2(hc)
cat("ClNumber             Nds         Clheight   IDs\\n",file="./CLUSTERS.txt")
for (i in 1:length(groups))
{
 sorted_groups <- sort(groups[[i]])
 linea <- sprintf("%%04d %%4d %%7.3f %%s\\n",
                  i,length(groups[[i]]),hc$height[i], paste(sorted_groups,collapse=" "))
 cat(linea, file="./CLUSTERS.txt", append=TRUE)
}

# reference: http://www.coppelia.io/2014/07/converting-an-r-hclust-object-into-a-d3-js-dendrogram/
library(rjson)
HCtoJSON<-function(hc){
  labels<-hc$labels
  merge<-data.frame(hc$merge)
  for (i in (1:nrow(merge))) {
    if (merge[i,1]<0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]),list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]>0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node", merge[i,1], ", list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]<0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]), node", merge[i,2],"))")))}
    else if (merge[i,1]>0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node",merge[i,1] , ", node" , merge[i,2]," ))")))}
  }
  eval(parse(text=paste0("JSON<-toJSON(node",nrow(merge), ")")))
  return(JSON)
}

JSON<-HCtoJSON(hc)
cat(JSON, file="dendro.json")

q(save="yes")
""" % dict(prefix=os.path.basename(prefix),
           ncol=len(self.arrays),
           hclabels=",".join(map(lambda x: "%d" % (x + 1), org2now.keys()))))

        call(cmd="Rscript",
             arg="%s_ana.R" % os.path.basename(prefix),
             wdir=self.wdir)

        output = open(os.path.join(self.wdir, "CLUSTERS.txt")).readlines()
        for l in output[1:]:
            sp = l.split()
            clid, clheight, ids = sp[0], sp[2], sp[3:]
            self.clusters[int(clid)] = [float(clheight), map(int, ids)]
Example #11
0
    def do_clustering(self,
                      nproc=1,
                      b_scale=False,
                      use_normalized=False,
                      cluster_method="ward",
                      distance_eqn="sqrt(1-cc)",
                      min_common_refs=3,
                      html_maker=None):
        """
        Using correlation as distance metric (for hierarchical clustering)
        https://stats.stackexchange.com/questions/165194/using-correlation-as-distance-metric-for-hierarchical-clustering

        Correlation "Distances" and Hierarchical Clustering
        http://research.stowers.org/mcm/efg/R/Visualization/cor-cluster/index.htm
        """

        self.clusters = {}
        prefix = os.path.join(self.wdir, "cctable")
        assert (b_scale, use_normalized).count(True) <= 1

        distance_eqns = {
            "sqrt(1-cc)": lambda x: numpy.sqrt(1. - x),
            "1-cc": lambda x: 1. - x,
            "sqrt(1-cc^2)": lambda x: numpy.sqrt(1. - x**2),
        }
        cc_to_distance = distance_eqns[
            distance_eqn]  # Fail when unknown options
        assert cluster_method in ("single", "complete", "average", "weighted",
                                  "centroid", "median", "ward"
                                  )  # available methods in scipy

        if len(self.arrays) < 2:
            print "WARNING: less than two data! can't do cc-based clustering"
            self.clusters[1] = [float("nan"), [0]]
            return

        # Absolute scaling using Wilson-B factor
        if b_scale:
            from mmtbx.scaling.matthews import p_vm_calculator
            from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling

            ofs_wilson = open("%s_wilson_scales.dat" % prefix, "w")
            n_residues = p_vm_calculator(self.arrays.values()[0], 1,
                                         0).best_guess
            ofs_wilson.write("# guessed n_residues= %d\n" % n_residues)
            ofs_wilson.write("file wilsonB\n")
            for f in self.arrays:
                arr = self.arrays[f]
                iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0)
                wilson_b = iso_scale_and_b.b_wilson
                ofs_wilson.write("%s %.3f\n" % (f, wilson_b))
                if wilson_b > 0:  # Ignoring data with B<0? is a bad idea.. but how..?
                    tmp = flex.exp(-2. * wilson_b *
                                   arr.unit_cell().d_star_sq(arr.indices()) /
                                   4.)
                    self.arrays[f] = arr.customized_copy(data=arr.data() * tmp,
                                                         sigmas=arr.sigmas() *
                                                         tmp)
            ofs_wilson.close()

        elif use_normalized:
            from mmtbx.scaling.absolute_scaling import kernel_normalisation
            failed = {}
            for f in self.arrays:
                arr = self.arrays[f]
                try:
                    normaliser = kernel_normalisation(arr, auto_kernel=True)
                    self.arrays[f] = arr.customized_copy(
                        data=arr.data() /
                        normaliser.normalizer_for_miller_array,
                        sigmas=arr.sigmas() /
                        normaliser.normalizer_for_miller_array)
                except Exception, e:
                    failed.setdefault(e.message, []).append(f)

            if failed:
                msg = ""
                for r in failed:
                    msg += " %s\n%s\n" % (r, "\n".join(
                        map(lambda x: "  %s" % x, failed[r])))
                raise Sorry(
                    "intensity normalization failed by following reason(s):\n%s"
                    % msg)
Example #12
0
    def __init__(self,
                 miller_array,
                 phil_object,
                 out=None,
                 out_plot=None,
                 miller_calc=None,
                 original_intensities=None,
                 completeness_as_non_anomalous=None,
                 verbose=0):
        if out is None:
            out = sys.stdout
        if verbose > 0:
            print >> out
            print >> out
            print >> out, "Matthews coefficient and Solvent content statistics"
        n_copies_solc = 1.0
        self.nres_known = False
        if (phil_object.scaling.input.asu_contents.n_residues is not None
                or phil_object.scaling.input.asu_contents.n_bases is not None):
            self.nres_known = True
            if (phil_object.scaling.input.asu_contents.sequence_file
                    is not None):
                print >> out, "  warning: ignoring sequence file"
        elif (phil_object.scaling.input.asu_contents.sequence_file
              is not None):
            print >> out, "  determining composition from sequence file %s" % \
              phil_object.scaling.input.asu_contents.sequence_file
            seq_comp = iotbx.bioinformatics.composition_from_sequence_file(
                file_name=phil_object.scaling.input.asu_contents.sequence_file,
                log=out)
            if (seq_comp is not None):
                phil_object.scaling.input.asu_contents.n_residues = seq_comp.n_residues
                phil_object.scaling.input.asu_contents.n_bases = seq_comp.n_bases
                self.nres_known = True
        matthews_results = matthews.matthews_rupp(
            crystal_symmetry=miller_array,
            n_residues=phil_object.scaling.input.asu_contents.n_residues,
            n_bases=phil_object.scaling.input.asu_contents.n_bases,
            out=out,
            verbose=1)
        phil_object.scaling.input.asu_contents.n_residues = matthews_results[0]
        phil_object.scaling.input.asu_contents.n_bases = matthews_results[1]
        n_copies_solc = matthews_results[2]
        self.matthews_results = matthews_results

        if phil_object.scaling.input.asu_contents.n_copies_per_asu is not None:
            n_copies_solc = phil_object.scaling.input.asu_contents.n_copies_per_asu
            self.defined_copies = n_copies_solc
            if verbose > 0:
                print >> out, "Number of copies per asymmetric unit provided"
                print >> out, " Will use user specified value of ", n_copies_solc
        else:
            phil_object.scaling.input.asu_contents.n_copies_per_asu = n_copies_solc
            self.guessed_copies = n_copies_solc

        # first report on I over sigma
        miller_array_new = miller_array
        self.data_strength = None
        miller_array_intensities = miller_array
        if (original_intensities is not None):
            assert original_intensities.is_xray_intensity_array()
            miller_array_intensities = original_intensities
        if miller_array_intensities.sigmas() is not None:
            data_strength = data_statistics.i_sigi_completeness_stats(
                miller_array_intensities,
                isigi_cut=phil_object.scaling.input.parameters.
                misc_twin_parameters.twin_test_cuts.isigi_cut,
                completeness_cut=phil_object.scaling.input.parameters.
                misc_twin_parameters.twin_test_cuts.completeness_cut,
                completeness_as_non_anomalous=completeness_as_non_anomalous)
            data_strength.show(out)
            self.data_strength = data_strength
            if phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution is None:
                if data_strength.resolution_cut > data_strength.resolution_at_least:
                    phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution = data_strength.resolution_at_least
                else:
                    phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution = data_strength.resolution_cut

        ## Isotropic wilson scaling
        if verbose > 0:
            print >> out
            print >> out
            print >> out, "Maximum likelihood isotropic Wilson scaling "

        n_residues = phil_object.scaling.input.asu_contents.n_residues
        n_bases = phil_object.scaling.input.asu_contents.n_bases
        if n_residues is None:
            n_residues = 0
        if n_bases is None:
            n_bases = 0
        if n_bases + n_residues == 0:
            raise Sorry("No scatterers available")
        iso_scale_and_b = absolute_scaling.ml_iso_absolute_scaling(
            miller_array=miller_array_new,
            n_residues=n_residues * miller_array.space_group().order_z() *
            n_copies_solc,
            n_bases=n_bases * miller_array.space_group().order_z() *
            n_copies_solc)
        iso_scale_and_b.show(out=out, verbose=verbose)
        self.iso_scale_and_b = iso_scale_and_b
        ## Store the b and scale values from isotropic ML scaling
        self.iso_p_scale = iso_scale_and_b.p_scale
        self.iso_b_wilson = iso_scale_and_b.b_wilson

        ## Anisotropic ml wilson scaling
        if verbose > 0:
            print >> out
            print >> out
            print >> out, "Maximum likelihood anisotropic Wilson scaling "
        aniso_scale_and_b = absolute_scaling.ml_aniso_absolute_scaling(
            miller_array=miller_array_new,
            n_residues=n_residues * miller_array.space_group().order_z() *
            n_copies_solc,
            n_bases=n_bases * miller_array.space_group().order_z() *
            n_copies_solc)
        aniso_scale_and_b.show(out=out, verbose=1)

        self.aniso_scale_and_b = aniso_scale_and_b

        try:
            b_cart = aniso_scale_and_b.b_cart
        except AttributeError, e:
            print >> out, "*** ERROR ***"
            print >> out, str(e)
            show_exception_info_if_full_testing()
            return
Example #13
0
    def do_clustering(self, nproc=1, b_scale=False, use_normalized=False, html_maker=None):
        self.clusters = {}
        prefix = os.path.join(self.wdir, "cctable")
        assert (b_scale, use_normalized).count(True) <= 1

        if len(self.arrays) < 2:
            print "WARNING: less than two data! can't do cc-based clustering"
            self.clusters[1] = [float("nan"), [0]]
            return

        # Absolute scaling using Wilson-B factor 
        if b_scale:
            from mmtbx.scaling.matthews import p_vm_calculator
            from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling
            
            ofs_wilson = open("%s_wilson_scales.dat"%prefix, "w")
            n_residues = p_vm_calculator(self.arrays.values()[0], 1, 0).best_guess
            ofs_wilson.write("# guessed n_residues= %d\n" % n_residues)
            ofs_wilson.write("file wilsonB\n")
            for f in self.arrays:
                arr = self.arrays[f]
                iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0)
                wilson_b = iso_scale_and_b.b_wilson
                ofs_wilson.write("%s %.3f\n" % (f, wilson_b))
                if wilson_b > 0: # Ignoring data with B<0? is a bad idea.. but how..?
                    tmp = flex.exp(-2. * wilson_b * arr.unit_cell().d_star_sq(arr.indices())/4.)
                    self.arrays[f] = arr.customized_copy(data=arr.data()*tmp,
                                                         sigmas=arr.sigmas()*tmp)
            ofs_wilson.close()

        elif use_normalized:
            from mmtbx.scaling.absolute_scaling import kernel_normalisation
            for f in self.arrays:
                arr = self.arrays[f]
                normaliser = kernel_normalisation(arr, auto_kernel=True)
                self.arrays[f] = arr.customized_copy(data=arr.data()/normaliser.normalizer_for_miller_array,
                                                     sigmas=arr.sigmas()/normaliser.normalizer_for_miller_array)
        # Prep 
        args = []
        for i in xrange(len(self.arrays)-1):
            for j in xrange(i+1, len(self.arrays)):
                args.append((i,j))
           
        # Calc all CC
        worker = lambda x: calc_cc(self.arrays.values()[x[0]], self.arrays.values()[x[1]])
        results = easy_mp.pool_map(fixed_func=worker,
                                   args=args,
                                   processes=nproc)

        # Check NaN and decide which data to remove
        idx_bad = {}
        nans = []
        cc_data_for_html = []
        for (i,j), (cc,nref) in zip(args, results):
            cc_data_for_html.append((i,j,cc,nref))
            if cc==cc: continue
            idx_bad[i] = idx_bad.get(i, 0) + 1
            idx_bad[j] = idx_bad.get(j, 0) + 1
            nans.append([i,j])

        if html_maker is not None:
            html_maker.add_cc_clustering_details(cc_data_for_html)

        idx_bad = idx_bad.items()
        idx_bad.sort(key=lambda x:x[1])
        remove_idxes = set()
        
        for idx, badcount in reversed(idx_bad):
            if len(filter(lambda x: idx in x, nans)) == 0: continue
            remove_idxes.add(idx)
            nans = filter(lambda x: idx not in x, nans)
            if len(nans) == 0: break

        use_idxes = filter(lambda x: x not in remove_idxes, xrange(len(self.arrays)))

        # Make table: original index (in file list) -> new index (in matrix)
        count = 0
        org2now = collections.OrderedDict()
        for i in xrange(len(self.arrays)):
            if i in remove_idxes: continue
            org2now[i] = count
            count += 1

        if len(remove_idxes) > 0:
            open("%s_notused.lst"%prefix, "w").write("\n".join(map(lambda x: self.arrays.keys()[x], remove_idxes)))

        # Make matrix
        mat = numpy.zeros(shape=(len(use_idxes), len(use_idxes)))
        for (i,j), (cc,nref) in zip(args, results):
            if i in remove_idxes or j in remove_idxes: continue
            mat[org2now[j], org2now[i]] = cc
            
        open("%s.matrix"%prefix, "w").write(" ".join(map(lambda x:"%.4f"%x, mat.flatten())))

        ofs = open("%s.dat"%prefix, "w")
        ofs.write("   i    j     cc  nref\n")
        for (i,j), (cc,nref) in zip(args, results):
            ofs.write("%4d %4d %.4f %4d\n" % (i,j,cc,nref))

        open("%s_ana.R"%prefix, "w").write("""\
treeToList2 <- function(htree)
{  # stolen from $CCP4/share/blend/R/blend0.R
 groups <- list()
 itree <- dim(htree$merge)[1]
 for (i in 1:itree)
 { 
  il <- htree$merge[i,1]
  ir <- htree$merge[i,2]
  if (il < 0) lab1 <- htree$labels[-il]
  if (ir < 0) lab2 <- htree$labels[-ir]
  if (il > 0) lab1 <- groups[[il]]
  if (ir > 0) lab2 <- groups[[ir]]
  lab <- c(lab1,lab2)
  lab <- as.integer(lab)
  groups <- c(groups,list(lab))
 }
 return(groups)
}

cc<-scan("%(prefix)s.matrix")
md<-matrix(1-cc, ncol=%(ncol)d, byrow=TRUE)
hc <- hclust(as.dist(md),method="ward")
pdf("tree.pdf")
plot(hc)
dev.off()
png("tree.png",height=1000,width=1000)
plot(hc)
dev.off()

hc$labels <- c(%(hclabels)s)
groups <- treeToList2(hc)
cat("ClNumber             Nds         Clheight   IDs\\n",file="./CLUSTERS.txt")
for (i in 1:length(groups))
{
 sorted_groups <- sort(groups[[i]])
 linea <- sprintf("%%04d %%4d %%7.3f %%s\\n",
                  i,length(groups[[i]]),hc$height[i], paste(sorted_groups,collapse=" "))
 cat(linea, file="./CLUSTERS.txt", append=TRUE)
}

# reference: http://www.coppelia.io/2014/07/converting-an-r-hclust-object-into-a-d3-js-dendrogram/
library(rjson)
HCtoJSON<-function(hc){
  labels<-hc$labels
  merge<-data.frame(hc$merge)
  for (i in (1:nrow(merge))) {
    if (merge[i,1]<0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]),list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]>0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node", merge[i,1], ", list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]<0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]), node", merge[i,2],"))")))}
    else if (merge[i,1]>0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node",merge[i,1] , ", node" , merge[i,2]," ))")))}
  }
  eval(parse(text=paste0("JSON<-toJSON(node",nrow(merge), ")")))
  return(JSON)
}

JSON<-HCtoJSON(hc)
cat(JSON, file="dendro.json")

q(save="yes")
""" % dict(prefix=os.path.basename(prefix),
           ncol=len(self.arrays),
           hclabels=",".join(map(lambda x: "%d"%(x+1), org2now.keys()))))

        call(cmd="Rscript", arg="%s_ana.R" % os.path.basename(prefix),
             wdir=self.wdir)

        output = open(os.path.join(self.wdir, "CLUSTERS.txt")).readlines()
        for l in output[1:]:
            sp = l.split()
            clid, clheight, ids = sp[0], sp[2], sp[3:]
            self.clusters[int(clid)] = [float(clheight), map(int,ids)]
Example #14
0
    def __init__(self,
                 miller_array,
                 phil_object,
                 out=None,
                 out_plot=None,
                 miller_calc=None,
                 original_intensities=None,
                 completeness_as_non_anomalous=None,
                 verbose=0):
        if out is None:
            out = sys.stdout
        if verbose > 0:
            print(file=out)
            print(file=out)
            print("Matthews coefficient and Solvent content statistics",
                  file=out)
        n_copies_solc = 1.0
        self.nres_known = False
        if (phil_object.scaling.input.asu_contents.n_residues is not None
                or phil_object.scaling.input.asu_contents.n_bases is not None):
            self.nres_known = True
            if (phil_object.scaling.input.asu_contents.sequence_file
                    is not None):
                print("  warning: ignoring sequence file", file=out)
        elif (phil_object.scaling.input.asu_contents.sequence_file
              is not None):
            print("  determining composition from sequence file %s" % \
              phil_object.scaling.input.asu_contents.sequence_file, file=out)
            seq_comp = iotbx.bioinformatics.composition_from_sequence_file(
                file_name=phil_object.scaling.input.asu_contents.sequence_file,
                log=out)
            if (seq_comp is not None):
                phil_object.scaling.input.asu_contents.n_residues = seq_comp.n_residues
                phil_object.scaling.input.asu_contents.n_bases = seq_comp.n_bases
                self.nres_known = True
        matthews_results = matthews.matthews_rupp(
            crystal_symmetry=miller_array,
            n_residues=phil_object.scaling.input.asu_contents.n_residues,
            n_bases=phil_object.scaling.input.asu_contents.n_bases,
            out=out,
            verbose=1)
        phil_object.scaling.input.asu_contents.n_residues = matthews_results[0]
        phil_object.scaling.input.asu_contents.n_bases = matthews_results[1]
        n_copies_solc = matthews_results[2]
        self.matthews_results = matthews_results

        if phil_object.scaling.input.asu_contents.n_copies_per_asu is not None:
            n_copies_solc = phil_object.scaling.input.asu_contents.n_copies_per_asu
            self.defined_copies = n_copies_solc
            if verbose > 0:
                print("Number of copies per asymmetric unit provided",
                      file=out)
                print(" Will use user specified value of ",
                      n_copies_solc,
                      file=out)
        else:
            phil_object.scaling.input.asu_contents.n_copies_per_asu = n_copies_solc
            self.guessed_copies = n_copies_solc

        # first report on I over sigma
        miller_array_new = miller_array
        self.data_strength = None
        miller_array_intensities = miller_array
        if (original_intensities is not None):
            assert original_intensities.is_xray_intensity_array()
            miller_array_intensities = original_intensities
        if miller_array_intensities.sigmas() is not None:
            data_strength = data_statistics.i_sigi_completeness_stats(
                miller_array_intensities,
                isigi_cut=phil_object.scaling.input.parameters.
                misc_twin_parameters.twin_test_cuts.isigi_cut,
                completeness_cut=phil_object.scaling.input.parameters.
                misc_twin_parameters.twin_test_cuts.completeness_cut,
                completeness_as_non_anomalous=completeness_as_non_anomalous)
            data_strength.show(out)
            self.data_strength = data_strength
            if phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution is None:
                if data_strength.resolution_cut > data_strength.resolution_at_least:
                    phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution = data_strength.resolution_at_least
                else:
                    phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution = data_strength.resolution_cut

        ## Isotropic wilson scaling
        if verbose > 0:
            print(file=out)
            print(file=out)
            print("Maximum likelihood isotropic Wilson scaling ", file=out)

        n_residues = phil_object.scaling.input.asu_contents.n_residues
        n_bases = phil_object.scaling.input.asu_contents.n_bases
        if n_residues is None:
            n_residues = 0
        if n_bases is None:
            n_bases = 0
        if n_bases + n_residues == 0:
            raise Sorry("No scatterers available")
        iso_scale_and_b = absolute_scaling.ml_iso_absolute_scaling(
            miller_array=miller_array_new,
            n_residues=n_residues * miller_array.space_group().order_z() *
            n_copies_solc,
            n_bases=n_bases * miller_array.space_group().order_z() *
            n_copies_solc)
        iso_scale_and_b.show(out=out, verbose=verbose)
        self.iso_scale_and_b = iso_scale_and_b
        ## Store the b and scale values from isotropic ML scaling
        self.iso_p_scale = iso_scale_and_b.p_scale
        self.iso_b_wilson = iso_scale_and_b.b_wilson

        ## Anisotropic ml wilson scaling
        if verbose > 0:
            print(file=out)
            print(file=out)
            print("Maximum likelihood anisotropic Wilson scaling ", file=out)
        aniso_scale_and_b = absolute_scaling.ml_aniso_absolute_scaling(
            miller_array=miller_array_new,
            n_residues=n_residues * miller_array.space_group().order_z() *
            n_copies_solc,
            n_bases=n_bases * miller_array.space_group().order_z() *
            n_copies_solc)
        aniso_scale_and_b.show(out=out, verbose=1)

        self.aniso_scale_and_b = aniso_scale_and_b

        try:
            b_cart = aniso_scale_and_b.b_cart
        except AttributeError as e:
            print("*** ERROR ***", file=out)
            print(str(e), file=out)
            show_exception_info_if_full_testing()
            return

        self.aniso_p_scale = aniso_scale_and_b.p_scale
        self.aniso_u_star = aniso_scale_and_b.u_star
        self.aniso_b_cart = aniso_scale_and_b.b_cart
        # XXX: for GUI
        self.overall_b_cart = getattr(aniso_scale_and_b, "overall_b_cart",
                                      None)

        ## Correcting for anisotropy
        if verbose > 0:
            print("Correcting for anisotropy in the data", file=out)
            print(file=out)

        b_cart_observed = aniso_scale_and_b.b_cart

        b_trace_average = (b_cart_observed[0] + b_cart_observed[1] +
                           b_cart_observed[2]) / 3.0
        b_trace_min = b_cart_observed[0]
        if b_cart_observed[1] < b_trace_min: b_trace_min = b_cart_observed[1]
        if b_cart_observed[2] < b_trace_min: b_trace_min = b_cart_observed[2]

        if phil_object.scaling.input.optional.aniso.final_b == "eigen_min":
            b_use = aniso_scale_and_b.eigen_values[2]
        elif phil_object.scaling.input.optional.aniso.final_b == "eigen_mean":
            b_use = flex.mean(aniso_scale_and_b.eigen_values)
        elif phil_object.scaling.input.optional.aniso.final_b == "user_b_iso":
            assert phil_object.scaling.input.optional.aniso.b_iso is not None
            b_use = phil_object.scaling.input.optional.aniso.b_iso
        else:
            b_use = 30

        b_cart_aniso_removed = [-b_use, -b_use, -b_use, 0, 0, 0]
        u_star_aniso_removed = adptbx.u_cart_as_u_star(
            miller_array.unit_cell(), adptbx.b_as_u(b_cart_aniso_removed))
        ## I do things in two steps, but can easely be done in 1 step
        ## just for clarity, thats all.
        self.no_aniso_array = absolute_scaling.anisotropic_correction(
            miller_array_new, 0.0, aniso_scale_and_b.u_star)
        self.no_aniso_array = absolute_scaling.anisotropic_correction(
            self.no_aniso_array, 0.0, u_star_aniso_removed)
        self.no_aniso_array = self.no_aniso_array.set_observation_type(
            miller_array)

        ## Make normalised structure factors please

        sel_big = self.no_aniso_array.data() > 1.e+50
        self.no_aniso_array = self.no_aniso_array.array(
            data=self.no_aniso_array.data().set_selected(sel_big, 0))
        self.no_aniso_array = self.no_aniso_array.set_observation_type(
            miller_array)

        normalistion = absolute_scaling.kernel_normalisation(
            self.no_aniso_array, auto_kernel=True)
        self.normalised_miller = normalistion.normalised_miller.deep_copy()

        self.phil_object = phil_object

        ## Some basic statistics and sanity checks follow
        if verbose > 0:
            print("Some basic intensity statistics follow.", file=out)
            print(file=out)

        basic_data_stats = data_statistics.basic_intensity_statistics(
            miller_array,
            aniso_scale_and_b.p_scale,
            aniso_scale_and_b.u_star,
            iso_scale_and_b.scat_info,
            out=out,
            out_plot=out_plot)
        self.basic_data_stats = basic_data_stats
        self.miller_array = basic_data_stats.new_miller

        #relative wilson plot
        self.rel_wilson = None
        if (miller_calc is not None) and (miller_calc.d_min() < 4.0):
            try:
                self.rel_wilson = relative_wilson.relative_wilson(
                    miller_obs=miller_array, miller_calc=miller_calc)
            except RuntimeError as e:
                print("*** Error calculating relative Wilson plot - skipping.",
                      file=out)
                print("", file=out)

        if verbose > 0:
            print("Basic analyses completed", file=out)
Example #15
0
  def __init__(self,
               miller_array,
               phil_object,
               out=None,
               out_plot=None, miller_calc=None,
               original_intensities=None,
               completeness_as_non_anomalous=None,
               verbose=0):
    if out is None:
      out=sys.stdout
    if verbose>0:
      print >> out
      print >> out
      print >> out, "Matthews coefficient and Solvent content statistics"
    n_copies_solc = 1.0
    self.nres_known = False
    if (phil_object.scaling.input.asu_contents.n_residues is not None or
        phil_object.scaling.input.asu_contents.n_bases is not None) :
      self.nres_known = True
      if (phil_object.scaling.input.asu_contents.sequence_file is not None) :
        print >> out, "  warning: ignoring sequence file"
    elif (phil_object.scaling.input.asu_contents.sequence_file is not None) :
      print >> out, "  determining composition from sequence file %s" % \
        phil_object.scaling.input.asu_contents.sequence_file
      seq_comp = iotbx.bioinformatics.composition_from_sequence_file(
        file_name=phil_object.scaling.input.asu_contents.sequence_file,
        log=out)
      if (seq_comp is not None) :
        phil_object.scaling.input.asu_contents.n_residues = seq_comp.n_residues
        phil_object.scaling.input.asu_contents.n_bases = seq_comp.n_bases
        self.nres_known = True
    matthews_results =matthews.matthews_rupp(
      crystal_symmetry = miller_array,
      n_residues = phil_object.scaling.input.asu_contents.n_residues,
      n_bases = phil_object.scaling.input.asu_contents.n_bases,
      out=out,verbose=1)
    phil_object.scaling.input.asu_contents.n_residues = matthews_results[0]
    phil_object.scaling.input.asu_contents.n_bases = matthews_results[1]
    n_copies_solc = matthews_results[2]
    self.matthews_results = matthews_results

    if phil_object.scaling.input.asu_contents.n_copies_per_asu is not None:
      n_copies_solc = phil_object.scaling.input.asu_contents.n_copies_per_asu
      self.defined_copies = n_copies_solc
      if verbose>0:
        print >> out,"Number of copies per asymmetric unit provided"
        print >> out," Will use user specified value of ", n_copies_solc
    else:
      phil_object.scaling.input.asu_contents.n_copies_per_asu = n_copies_solc
      self.guessed_copies = n_copies_solc

    # first report on I over sigma
    miller_array_new = miller_array
    self.data_strength = None
    miller_array_intensities = miller_array
    if (original_intensities is not None) :
      assert original_intensities.is_xray_intensity_array()
      miller_array_intensities = original_intensities
    if miller_array_intensities.sigmas() is not None:
      data_strength=data_statistics.i_sigi_completeness_stats(
        miller_array_intensities,
        isigi_cut = phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.isigi_cut,
        completeness_cut = phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.completeness_cut,
      completeness_as_non_anomalous=completeness_as_non_anomalous)
      data_strength.show(out)
      self.data_strength = data_strength
      if phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution is None:
        if data_strength.resolution_cut > data_strength.resolution_at_least:
          phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution = data_strength.resolution_at_least
        else:
           phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution = data_strength.resolution_cut

    ## Isotropic wilson scaling
    if verbose>0:
      print >> out
      print >> out
      print >> out, "Maximum likelihood isotropic Wilson scaling "

    n_residues =  phil_object.scaling.input.asu_contents.n_residues
    n_bases = phil_object.scaling.input.asu_contents.n_bases
    if n_residues is None:
      n_residues = 0
    if n_bases is None:
      n_bases = 0
    if n_bases+n_residues==0:
      raise Sorry("No scatterers available")
    iso_scale_and_b = absolute_scaling.ml_iso_absolute_scaling(
      miller_array = miller_array_new,
      n_residues = n_residues*
      miller_array.space_group().order_z()*n_copies_solc,
      n_bases=n_bases*
      miller_array.space_group().order_z()*n_copies_solc)
    iso_scale_and_b.show(out=out,verbose=verbose)
    self.iso_scale_and_b = iso_scale_and_b
    ## Store the b and scale values from isotropic ML scaling
    self.iso_p_scale = iso_scale_and_b.p_scale
    self.iso_b_wilson =  iso_scale_and_b.b_wilson


    ## Anisotropic ml wilson scaling
    if verbose>0:
      print >> out
      print >> out
      print >> out, "Maximum likelihood anisotropic Wilson scaling "
    aniso_scale_and_b = absolute_scaling.ml_aniso_absolute_scaling(
      miller_array = miller_array_new,
      n_residues = n_residues*miller_array.space_group().order_z()*n_copies_solc,
      n_bases = n_bases*miller_array.space_group().order_z()*n_copies_solc)
    aniso_scale_and_b.show(out=out,verbose=1)

    self.aniso_scale_and_b = aniso_scale_and_b

    try: b_cart = aniso_scale_and_b.b_cart
    except AttributeError, e:
      print >> out, "*** ERROR ***"
      print >> out, str(e)
      show_exception_info_if_full_testing()
      return
def calc_stats(xac_file, stat_choice, n_residues=None, ref_v6cell=None,
               min_peak=None, min_peak_percentile=None, correct_peak=None):
    # Open XDS_ASCII
    if xac_file.endswith(".pkl"): xac = pickle.load(open(xac_file))
    else: xac = xds_ascii.XDS_ASCII(xac_file)
    
    sel_remove = flex.bool(xac.iobs.size(), False)
    if min_peak is not None:
        sel = xac.peak < min_peak
        sel_remove |= sel
    elif min_peak_percentile is not None:
        q = numpy.percentile(xac.peak, min_peak_percentile)
        print "percentile %.2f %s" % (q, xac)
        sel = xac.peak < q
        sel_remove |= sel

    if correct_peak: sel_remove |= (xac.peak < 1) # remove PEAK==0

    xac.remove_selection(sel_remove)

    if params.correct_peak:
        xac.iobs *= xac.peak * .01
        xac.sigma_iobs *= xac.peak * .01

    iobs = xac.i_obs(anomalous_flag=False)
    iobs = iobs.select(iobs.sigmas()>0).merge_equivalents(use_internal_variance=False).array()
    
    stats = dict(filename=xac_file, cell=iobs.unit_cell().parameters())

    if iobs.size() == 0:
        return stats

    if "ioversigma" in stat_choice or "resnatsnr1" in stat_choice:
        binner = iobs.setup_binner(auto_binning=True)

        if "ioversigma" in stat_choice: stats["ioversigma"] = flex.mean(iobs.data()/iobs.sigmas())

        if "resnatsnr1" in stat_choice:
            res = float("nan")
            for i_bin in binner.range_used():
                sel = binner.selection(i_bin)
                tmp = iobs.select(sel)
                if tmp.size() == 0: continue
                sn = flex.mean(tmp.data()/tmp.sigmas())
                if sn <= 1:
                    res = binner.bin_d_range(i_bin)[1]
                    break

            stats["resnatsnr1"] = res

    if "abdist" in stat_choice:
        from cctbx.uctbx.determine_unit_cell import NCDist
        G6a, G6b = ref_v6cell, v6cell(iobs.unit_cell().niggli_cell())
        abdist = NCDist(G6a, G6b)
        stats["abdist"] = abdist

    if "wilsonb" in stat_choice:
        iso_scale_and_b = ml_iso_absolute_scaling(iobs, n_residues, 0)
        stats["wilsonb"] = iso_scale_and_b.b_wilson

    print stats
    return stats
def calc_stats(xac_file,
               stat_choice,
               n_residues=None,
               ref_v6cell=None,
               min_peak=None,
               min_peak_percentile=None,
               correct_peak=None):
    # Open XDS_ASCII
    if xac_file.endswith(".pkl"): xac = pickle.load(open(xac_file))
    else: xac = xds_ascii.XDS_ASCII(xac_file)

    sel_remove = flex.bool(xac.iobs.size(), False)
    if min_peak is not None:
        sel = xac.peak < min_peak
        sel_remove |= sel
    elif min_peak_percentile is not None:
        q = numpy.percentile(xac.peak, min_peak_percentile)
        print "percentile %.2f %s" % (q, xac)
        sel = xac.peak < q
        sel_remove |= sel

    if correct_peak: sel_remove |= (xac.peak < 1)  # remove PEAK==0

    xac.remove_selection(sel_remove)

    if params.correct_peak:
        xac.iobs *= xac.peak * .01
        xac.sigma_iobs *= xac.peak * .01

    iobs = xac.i_obs(anomalous_flag=False)
    iobs = iobs.select(iobs.sigmas() > 0).merge_equivalents(
        use_internal_variance=False).array()

    stats = dict(filename=xac_file, cell=iobs.unit_cell().parameters())

    if iobs.size() == 0:
        return stats

    if "ioversigma" in stat_choice or "resnatsnr1" in stat_choice:
        binner = iobs.setup_binner(auto_binning=True)

        if "ioversigma" in stat_choice:
            stats["ioversigma"] = flex.mean(iobs.data() / iobs.sigmas())

        if "resnatsnr1" in stat_choice:
            res = float("nan")
            for i_bin in binner.range_used():
                sel = binner.selection(i_bin)
                tmp = iobs.select(sel)
                if tmp.size() == 0: continue
                sn = flex.mean(tmp.data() / tmp.sigmas())
                if sn <= 1:
                    res = binner.bin_d_range(i_bin)[1]
                    break

            stats["resnatsnr1"] = res

    if "abdist" in stat_choice:
        from cctbx.uctbx.determine_unit_cell import NCDist
        G6a, G6b = ref_v6cell, v6cell(iobs.unit_cell().niggli_cell())
        abdist = NCDist(G6a, G6b)
        stats["abdist"] = abdist

    if "wilsonb" in stat_choice:
        iso_scale_and_b = ml_iso_absolute_scaling(iobs, n_residues, 0)
        stats["wilsonb"] = iso_scale_and_b.b_wilson

    print stats
    return stats