コード例 #1
0
def calc_principal_vectors(miller_array, log_out):
    """
    Reference: Aimless program
    Triclinic & Monoclinic: from B_cart
    Orthorhombic: along a*, b*, c*
    Trigonal, Hexagonal, Tetragonal: c* and a*b*-plane
    Cubic: no anisotropy

    return type: list of (vector, label, in-plane or not)
    """

    n_residues = p_vm_calculator(miller_array, 1, 0).best_guess

    aniso_scale_and_b = absolute_scaling.ml_aniso_absolute_scaling(miller_array=miller_array,
                                                                   n_residues=n_residues,
                                                                   n_bases=0)

    if aniso_scale_and_b.eigen_values[0] == 0:
        print >>log_out, "Error! Cannot determine B_cart"
        return

    b_cart = aniso_scale_and_b.b_cart
    print >>log_out, """ML estimate of overall B_cart:
 /  %5.2f %5.2f %5.2f \\
 |  %11.2f %5.2f |
 \\  %17.2f /
""" % (b_cart[0], b_cart[3], b_cart[4],
       b_cart[1], b_cart[5], b_cart[2])

    ev = aniso_scale_and_b.eigen_vectors

    orth_mat = numpy.array(miller_array.unit_cell().orthogonalization_matrix()).reshape(3,3)

    print >>log_out, "Eigenvalues/vectors:"
    for i, eg in enumerate(aniso_scale_and_b.eigen_values):
        v = ev[3*i:3*(i+1)]
        vs = ", ".join(map(lambda x: "% .4f"%x, v))
        vl = axis_label(v, orth_mat)
        print >>log_out, " %8.3f (%s) %s" % (eg, vs, vl)
    cs = miller_array.space_group().crystal_system()

    if cs == "Cubic":
        print >>log_out, "No anisotropy in this symmetry."
        return []
    elif cs == "Orthorhombic":
        return ([1.,0.,0.], "a*", False), ([0.,1.,0.], "b*", False), ([0.,0.,1.], "c*", False)
    elif cs in ("Triclinic", "Monoclinic"):
        return ((ev[:3], axis_label(ev[:3], orth_mat), False),
                (ev[3:6], axis_label(ev[3:6], orth_mat), False),
                (ev[6:], axis_label(ev[6:], orth_mat), False))
    else: # in "Tetragonal", "Hexagonal", "Trigonal", "Cubic"
        return ([0.,0.,1.], "c*", False), ([0.,0.,1.], "a*b*", True)
コード例 #2
0
    def do_clustering(self,
                      nproc=1,
                      b_scale=False,
                      use_normalized=False,
                      html_maker=None):
        self.clusters = {}
        prefix = os.path.join(self.wdir, "cctable")
        assert (b_scale, use_normalized).count(True) <= 1

        if len(self.arrays) < 2:
            print "WARNING: less than two data! can't do cc-based clustering"
            self.clusters[1] = [float("nan"), [0]]
            return

        # Absolute scaling using Wilson-B factor
        if b_scale:
            from mmtbx.scaling.matthews import p_vm_calculator
            from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling

            ofs_wilson = open("%s_wilson_scales.dat" % prefix, "w")
            n_residues = p_vm_calculator(self.arrays.values()[0], 1,
                                         0).best_guess
            ofs_wilson.write("# guessed n_residues= %d\n" % n_residues)
            ofs_wilson.write("file wilsonB\n")
            for f in self.arrays:
                arr = self.arrays[f]
                iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0)
                wilson_b = iso_scale_and_b.b_wilson
                ofs_wilson.write("%s %.3f\n" % (f, wilson_b))
                if wilson_b > 0:  # Ignoring data with B<0? is a bad idea.. but how..?
                    tmp = flex.exp(-2. * wilson_b *
                                   arr.unit_cell().d_star_sq(arr.indices()) /
                                   4.)
                    self.arrays[f] = arr.customized_copy(data=arr.data() * tmp,
                                                         sigmas=arr.sigmas() *
                                                         tmp)
            ofs_wilson.close()

        elif use_normalized:
            from mmtbx.scaling.absolute_scaling import kernel_normalisation
            for f in self.arrays:
                arr = self.arrays[f]
                normaliser = kernel_normalisation(arr, auto_kernel=True)
                self.arrays[f] = arr.customized_copy(
                    data=arr.data() / normaliser.normalizer_for_miller_array,
                    sigmas=arr.sigmas() /
                    normaliser.normalizer_for_miller_array)
        # Prep
        args = []
        for i in xrange(len(self.arrays) - 1):
            for j in xrange(i + 1, len(self.arrays)):
                args.append((i, j))

        # Calc all CC
        if self.use_sfdist:
            worker = lambda x: calc_sfdist(self.arrays.values()[x[0]],
                                           self.arrays.values()[x[1]])
        else:
            worker = lambda x: calc_cc(self.arrays.values()[x[0]],
                                       self.arrays.values()[x[1]])
        results = easy_mp.pool_map(fixed_func=worker,
                                   args=args,
                                   processes=nproc)

        # Check NaN and decide which data to remove
        idx_bad = {}
        nans = []
        cc_data_for_html = []
        for (i, j), (cc, nref) in zip(args, results):
            cc_data_for_html.append((i, j, cc, nref))
            if cc == cc: continue
            idx_bad[i] = idx_bad.get(i, 0) + 1
            idx_bad[j] = idx_bad.get(j, 0) + 1
            nans.append([i, j])

        if html_maker is not None:
            html_maker.add_cc_clustering_details(cc_data_for_html)

        idx_bad = idx_bad.items()
        idx_bad.sort(key=lambda x: x[1])
        remove_idxes = set()

        for idx, badcount in reversed(idx_bad):
            if len(filter(lambda x: idx in x, nans)) == 0: continue
            remove_idxes.add(idx)
            nans = filter(lambda x: idx not in x, nans)
            if len(nans) == 0: break

        use_idxes = filter(lambda x: x not in remove_idxes,
                           xrange(len(self.arrays)))

        # Make table: original index (in file list) -> new index (in matrix)
        count = 0
        org2now = collections.OrderedDict()
        for i in xrange(len(self.arrays)):
            if i in remove_idxes: continue
            org2now[i] = count
            count += 1

        if len(remove_idxes) > 0:
            open("%s_notused.lst" % prefix, "w").write("\n".join(
                map(lambda x: self.arrays.keys()[x], remove_idxes)))

        # Make matrix
        mat = numpy.zeros(shape=(len(use_idxes), len(use_idxes)))
        for (i, j), (cc, nref) in zip(args, results):
            if i in remove_idxes or j in remove_idxes: continue
            mat[org2now[j], org2now[i]] = cc

        open("%s.matrix" % prefix,
             "w").write(" ".join(map(lambda x: "%.4f" % x, mat.flatten())))

        ofs = open("%s.dat" % prefix, "w")
        ofs.write("   i    j     cc  nref\n")
        for (i, j), (cc, nref) in zip(args, results):
            ofs.write("%4d %4d %.4f %4d\n" % (i, j, cc, nref))

        open("%s_ana.R" % prefix, "w").write("""\
treeToList2 <- function(htree)
{  # stolen from $CCP4/share/blend/R/blend0.R
 groups <- list()
 itree <- dim(htree$merge)[1]
 for (i in 1:itree)
 { 
  il <- htree$merge[i,1]
  ir <- htree$merge[i,2]
  if (il < 0) lab1 <- htree$labels[-il]
  if (ir < 0) lab2 <- htree$labels[-ir]
  if (il > 0) lab1 <- groups[[il]]
  if (ir > 0) lab2 <- groups[[ir]]
  lab <- c(lab1,lab2)
  lab <- as.integer(lab)
  groups <- c(groups,list(lab))
 }
 return(groups)
}

cc<-scan("%(prefix)s.matrix")
md<-matrix(1-cc, ncol=%(ncol)d, byrow=TRUE)
hc <- hclust(as.dist(md),method="ward")
pdf("tree.pdf")
plot(hc)
dev.off()
png("tree.png",height=1000,width=1000)
plot(hc)
dev.off()

hc$labels <- c(%(hclabels)s)
groups <- treeToList2(hc)
cat("ClNumber             Nds         Clheight   IDs\\n",file="./CLUSTERS.txt")
for (i in 1:length(groups))
{
 sorted_groups <- sort(groups[[i]])
 linea <- sprintf("%%04d %%4d %%7.3f %%s\\n",
                  i,length(groups[[i]]),hc$height[i], paste(sorted_groups,collapse=" "))
 cat(linea, file="./CLUSTERS.txt", append=TRUE)
}

# reference: http://www.coppelia.io/2014/07/converting-an-r-hclust-object-into-a-d3-js-dendrogram/
library(rjson)
HCtoJSON<-function(hc){
  labels<-hc$labels
  merge<-data.frame(hc$merge)
  for (i in (1:nrow(merge))) {
    if (merge[i,1]<0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]),list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]>0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node", merge[i,1], ", list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]<0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]), node", merge[i,2],"))")))}
    else if (merge[i,1]>0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node",merge[i,1] , ", node" , merge[i,2]," ))")))}
  }
  eval(parse(text=paste0("JSON<-toJSON(node",nrow(merge), ")")))
  return(JSON)
}

JSON<-HCtoJSON(hc)
cat(JSON, file="dendro.json")

q(save="yes")
""" % dict(prefix=os.path.basename(prefix),
           ncol=len(self.arrays),
           hclabels=",".join(map(lambda x: "%d" % (x + 1), org2now.keys()))))

        call(cmd="Rscript",
             arg="%s_ana.R" % os.path.basename(prefix),
             wdir=self.wdir)

        output = open(os.path.join(self.wdir, "CLUSTERS.txt")).readlines()
        for l in output[1:]:
            sp = l.split()
            clid, clheight, ids = sp[0], sp[2], sp[3:]
            self.clusters[int(clid)] = [float(clheight), map(int, ids)]
コード例 #3
0
def run(hklin, pdbin, wdir, anisotropy_correction=False):
    arrays = iotbx.file_reader.any_file(hklin).file_server.miller_arrays
    i_arrays = filter(
        lambda x: x.is_xray_intensity_array() and x.anomalous_flag(), arrays)
    f_arrays = filter(
        lambda x: x.is_xray_amplitude_array() and x.anomalous_flag(), arrays)

    if not i_arrays and not f_arrays:
        print "No anomalous observation data"
        return

    if os.path.exists(wdir):
        print "%s already exists. quiting." % wdir
        return

    os.mkdir(wdir)

    xs = crystal_symmetry_from_any.extract_from(pdbin)

    sh_out = open(os.path.join(wdir, "run_anode.sh"), "w")
    sh_out.write("#!/bin/sh\n\n")
    sh_out.write("shelxc anode <<+ > shelxc.log 2>&1\n")
    sh_out.write("cell %s\n" % format_unit_cell(xs.unit_cell()))
    sh_out.write("spag %s\n" % str(xs.space_group_info()).replace(" ", ""))

    if i_arrays:
        obs_array = i_arrays[0]
        infile = "%s.hkl" % os.path.splitext(os.path.basename(hklin))[0]
        in_opt = "%s" % infile
        print "Using intensity array:", obs_array.info().label_string()
    else:
        obs_array = f_arrays[0]
        infile = "%s_f.hkl" % os.path.splitext(os.path.basename(hklin))[0]
        in_opt = "-f %s" % infile
        print "No intensity arrays. Using amplitude arrays instead:", obs_array.info(
        ).label_string()

    sh_out.write("! data from %s : %s\n" %
                 (os.path.abspath(hklin), obs_array.info().label_string()))
    obs_array.crystal_symmetry().show_summary(sh_out, prefix="! ")
    check_symm(obs_array.crystal_symmetry(), xs)

    n_org = obs_array.size()
    obs_array = obs_array.eliminate_sys_absent()
    n_sys_abs = n_org - obs_array.size()
    if n_sys_abs > 0:
        print "  %d systematic absences removed." % n_sys_abs

    if anisotropy_correction:
        print "Correcting anisotropy.."
        n_residues = p_vm_calculator(obs_array, 1, 0).best_guess
        abss = ml_aniso_absolute_scaling(obs_array, n_residues=n_residues)
        abss.show()
        tmp = -2. if i_arrays else -1.
        b_cart = map(lambda x: x * tmp, abss.b_cart)
        obs_array = obs_array.apply_debye_waller_factors(b_cart=b_cart)

    sh_out.write("sad %s\n" % in_opt)
    iotbx.shelx.hklf.miller_array_export_as_shelx_hklf(
        obs_array,
        open(os.path.join(wdir, infile), "w"),
        normalise_if_format_overflow=True)
    sh_out.write("+\n\n")
    sh_out.write('ln -s "%s" anode.pdb\n\n' % os.path.relpath(pdbin, wdir))
    sh_out.write("anode anode\n")
    sh_out.close()

    call(cmd="sh", arg="./run_anode.sh", wdir=wdir)

    pha_file = os.path.join(wdir, "anode.pha")
    if os.path.isfile(pha_file):
        pha2mtz(pha_file, xs, os.path.join(wdir, "anode.pha.mtz"))

    print "Done. See %s/" % wdir

    fa_file = os.path.join(wdir, "anode_fa.hkl")
    if os.path.isfile(fa_file):
        r = iotbx.shelx.hklf.reader(open(fa_file))
        fa_array = r.as_miller_arrays(crystal_symmetry=xs)[0]
        print "\nData stats:"
        print " # Cmpl.o = Anomalous completeness in original data"
        print " # Cmpl.c = Anomalous completeness in shelxc result (rejections)"
        print " # SigAno = <d''/sigma> in shelxc result"
        print " d_max d_min Cmpl.o Cmpl.c SigAno"
        binner = obs_array.setup_binner(n_bins=12)
        for i_bin in binner.range_used():
            d_max_bin, d_min_bin = binner.bin_d_range(i_bin)
            obs_sel = obs_array.resolution_filter(d_max_bin, d_min_bin)
            obs_sel_ano = obs_sel.anomalous_differences()
            fa_sel = fa_array.resolution_filter(d_max_bin, d_min_bin)
            cmplset = obs_sel_ano.complete_set(
                d_max=d_max_bin, d_min=d_min_bin).select_acentric()
            n_acentric = cmplset.size()
            sigano = flex.mean(
                fa_sel.data() /
                fa_sel.sigmas()) if fa_sel.size() else float("nan")
            print " %5.2f %5.2f %6.2f %6.2f %6.2f" % (
                d_max_bin, d_min_bin, 100. * obs_sel_ano.size() / n_acentric,
                100. * fa_sel.size() / n_acentric, sigano)

    lsa_file = os.path.join(wdir, "anode.lsa")
    if os.path.isfile(lsa_file):
        print ""
        flag = False
        for l in open(lsa_file):
            if "Strongest unique anomalous peaks" in l:
                flag = True
            elif "Reflections written to" in l:
                flag = False
            if flag:
                print l.rstrip()

    if os.path.isfile(("anode_fa.res")):
        x = iotbx.shelx.cctbx_xray_structure_from(file=open("anode_fa.res"))
        open("anode_fa.pdb", "w").write(x.as_pdb_file())
コード例 #4
0
ファイル: cc_clustering.py プロジェクト: YonekuraLab/yamtbx
    def do_clustering(self,
                      nproc=1,
                      b_scale=False,
                      use_normalized=False,
                      cluster_method="ward",
                      distance_eqn="sqrt(1-cc)",
                      min_common_refs=3,
                      html_maker=None):
        """
        Using correlation as distance metric (for hierarchical clustering)
        https://stats.stackexchange.com/questions/165194/using-correlation-as-distance-metric-for-hierarchical-clustering

        Correlation "Distances" and Hierarchical Clustering
        http://research.stowers.org/mcm/efg/R/Visualization/cor-cluster/index.htm
        """

        self.clusters = {}
        prefix = os.path.join(self.wdir, "cctable")
        assert (b_scale, use_normalized).count(True) <= 1

        distance_eqns = {
            "sqrt(1-cc)": lambda x: numpy.sqrt(1. - x),
            "1-cc": lambda x: 1. - x,
            "sqrt(1-cc^2)": lambda x: numpy.sqrt(1. - x**2),
        }
        cc_to_distance = distance_eqns[
            distance_eqn]  # Fail when unknown options
        assert cluster_method in ("single", "complete", "average", "weighted",
                                  "centroid", "median", "ward"
                                  )  # available methods in scipy

        if len(self.arrays) < 2:
            print "WARNING: less than two data! can't do cc-based clustering"
            self.clusters[1] = [float("nan"), [0]]
            return

        # Absolute scaling using Wilson-B factor
        if b_scale:
            from mmtbx.scaling.matthews import p_vm_calculator
            from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling

            ofs_wilson = open("%s_wilson_scales.dat" % prefix, "w")
            n_residues = p_vm_calculator(self.arrays.values()[0], 1,
                                         0).best_guess
            ofs_wilson.write("# guessed n_residues= %d\n" % n_residues)
            ofs_wilson.write("file wilsonB\n")
            for f in self.arrays:
                arr = self.arrays[f]
                iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0)
                wilson_b = iso_scale_and_b.b_wilson
                ofs_wilson.write("%s %.3f\n" % (f, wilson_b))
                if wilson_b > 0:  # Ignoring data with B<0? is a bad idea.. but how..?
                    tmp = flex.exp(-2. * wilson_b *
                                   arr.unit_cell().d_star_sq(arr.indices()) /
                                   4.)
                    self.arrays[f] = arr.customized_copy(data=arr.data() * tmp,
                                                         sigmas=arr.sigmas() *
                                                         tmp)
            ofs_wilson.close()

        elif use_normalized:
            from mmtbx.scaling.absolute_scaling import kernel_normalisation
            failed = {}
            for f in self.arrays:
                arr = self.arrays[f]
                try:
                    normaliser = kernel_normalisation(arr, auto_kernel=True)
                    self.arrays[f] = arr.customized_copy(
                        data=arr.data() /
                        normaliser.normalizer_for_miller_array,
                        sigmas=arr.sigmas() /
                        normaliser.normalizer_for_miller_array)
                except Exception, e:
                    failed.setdefault(e.message, []).append(f)

            if failed:
                msg = ""
                for r in failed:
                    msg += " %s\n%s\n" % (r, "\n".join(
                        map(lambda x: "  %s" % x, failed[r])))
                raise Sorry(
                    "intensity normalization failed by following reason(s):\n%s"
                    % msg)
コード例 #5
0
ファイル: cc_clustering.py プロジェクト: harumome/kamo
    def do_clustering(self, nproc=1, b_scale=False, use_normalized=False, html_maker=None):
        self.clusters = {}
        prefix = os.path.join(self.wdir, "cctable")
        assert (b_scale, use_normalized).count(True) <= 1

        if len(self.arrays) < 2:
            print "WARNING: less than two data! can't do cc-based clustering"
            self.clusters[1] = [float("nan"), [0]]
            return

        # Absolute scaling using Wilson-B factor 
        if b_scale:
            from mmtbx.scaling.matthews import p_vm_calculator
            from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling
            
            ofs_wilson = open("%s_wilson_scales.dat"%prefix, "w")
            n_residues = p_vm_calculator(self.arrays.values()[0], 1, 0).best_guess
            ofs_wilson.write("# guessed n_residues= %d\n" % n_residues)
            ofs_wilson.write("file wilsonB\n")
            for f in self.arrays:
                arr = self.arrays[f]
                iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0)
                wilson_b = iso_scale_and_b.b_wilson
                ofs_wilson.write("%s %.3f\n" % (f, wilson_b))
                if wilson_b > 0: # Ignoring data with B<0? is a bad idea.. but how..?
                    tmp = flex.exp(-2. * wilson_b * arr.unit_cell().d_star_sq(arr.indices())/4.)
                    self.arrays[f] = arr.customized_copy(data=arr.data()*tmp,
                                                         sigmas=arr.sigmas()*tmp)
            ofs_wilson.close()

        elif use_normalized:
            from mmtbx.scaling.absolute_scaling import kernel_normalisation
            for f in self.arrays:
                arr = self.arrays[f]
                normaliser = kernel_normalisation(arr, auto_kernel=True)
                self.arrays[f] = arr.customized_copy(data=arr.data()/normaliser.normalizer_for_miller_array,
                                                     sigmas=arr.sigmas()/normaliser.normalizer_for_miller_array)
        # Prep 
        args = []
        for i in xrange(len(self.arrays)-1):
            for j in xrange(i+1, len(self.arrays)):
                args.append((i,j))
           
        # Calc all CC
        worker = lambda x: calc_cc(self.arrays.values()[x[0]], self.arrays.values()[x[1]])
        results = easy_mp.pool_map(fixed_func=worker,
                                   args=args,
                                   processes=nproc)

        # Check NaN and decide which data to remove
        idx_bad = {}
        nans = []
        cc_data_for_html = []
        for (i,j), (cc,nref) in zip(args, results):
            cc_data_for_html.append((i,j,cc,nref))
            if cc==cc: continue
            idx_bad[i] = idx_bad.get(i, 0) + 1
            idx_bad[j] = idx_bad.get(j, 0) + 1
            nans.append([i,j])

        if html_maker is not None:
            html_maker.add_cc_clustering_details(cc_data_for_html)

        idx_bad = idx_bad.items()
        idx_bad.sort(key=lambda x:x[1])
        remove_idxes = set()
        
        for idx, badcount in reversed(idx_bad):
            if len(filter(lambda x: idx in x, nans)) == 0: continue
            remove_idxes.add(idx)
            nans = filter(lambda x: idx not in x, nans)
            if len(nans) == 0: break

        use_idxes = filter(lambda x: x not in remove_idxes, xrange(len(self.arrays)))

        # Make table: original index (in file list) -> new index (in matrix)
        count = 0
        org2now = collections.OrderedDict()
        for i in xrange(len(self.arrays)):
            if i in remove_idxes: continue
            org2now[i] = count
            count += 1

        if len(remove_idxes) > 0:
            open("%s_notused.lst"%prefix, "w").write("\n".join(map(lambda x: self.arrays.keys()[x], remove_idxes)))

        # Make matrix
        mat = numpy.zeros(shape=(len(use_idxes), len(use_idxes)))
        for (i,j), (cc,nref) in zip(args, results):
            if i in remove_idxes or j in remove_idxes: continue
            mat[org2now[j], org2now[i]] = cc
            
        open("%s.matrix"%prefix, "w").write(" ".join(map(lambda x:"%.4f"%x, mat.flatten())))

        ofs = open("%s.dat"%prefix, "w")
        ofs.write("   i    j     cc  nref\n")
        for (i,j), (cc,nref) in zip(args, results):
            ofs.write("%4d %4d %.4f %4d\n" % (i,j,cc,nref))

        open("%s_ana.R"%prefix, "w").write("""\
treeToList2 <- function(htree)
{  # stolen from $CCP4/share/blend/R/blend0.R
 groups <- list()
 itree <- dim(htree$merge)[1]
 for (i in 1:itree)
 { 
  il <- htree$merge[i,1]
  ir <- htree$merge[i,2]
  if (il < 0) lab1 <- htree$labels[-il]
  if (ir < 0) lab2 <- htree$labels[-ir]
  if (il > 0) lab1 <- groups[[il]]
  if (ir > 0) lab2 <- groups[[ir]]
  lab <- c(lab1,lab2)
  lab <- as.integer(lab)
  groups <- c(groups,list(lab))
 }
 return(groups)
}

cc<-scan("%(prefix)s.matrix")
md<-matrix(1-cc, ncol=%(ncol)d, byrow=TRUE)
hc <- hclust(as.dist(md),method="ward")
pdf("tree.pdf")
plot(hc)
dev.off()
png("tree.png",height=1000,width=1000)
plot(hc)
dev.off()

hc$labels <- c(%(hclabels)s)
groups <- treeToList2(hc)
cat("ClNumber             Nds         Clheight   IDs\\n",file="./CLUSTERS.txt")
for (i in 1:length(groups))
{
 sorted_groups <- sort(groups[[i]])
 linea <- sprintf("%%04d %%4d %%7.3f %%s\\n",
                  i,length(groups[[i]]),hc$height[i], paste(sorted_groups,collapse=" "))
 cat(linea, file="./CLUSTERS.txt", append=TRUE)
}

# reference: http://www.coppelia.io/2014/07/converting-an-r-hclust-object-into-a-d3-js-dendrogram/
library(rjson)
HCtoJSON<-function(hc){
  labels<-hc$labels
  merge<-data.frame(hc$merge)
  for (i in (1:nrow(merge))) {
    if (merge[i,1]<0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]),list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]>0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node", merge[i,1], ", list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]<0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]), node", merge[i,2],"))")))}
    else if (merge[i,1]>0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node",merge[i,1] , ", node" , merge[i,2]," ))")))}
  }
  eval(parse(text=paste0("JSON<-toJSON(node",nrow(merge), ")")))
  return(JSON)
}

JSON<-HCtoJSON(hc)
cat(JSON, file="dendro.json")

q(save="yes")
""" % dict(prefix=os.path.basename(prefix),
           ncol=len(self.arrays),
           hclabels=",".join(map(lambda x: "%d"%(x+1), org2now.keys()))))

        call(cmd="Rscript", arg="%s_ana.R" % os.path.basename(prefix),
             wdir=self.wdir)

        output = open(os.path.join(self.wdir, "CLUSTERS.txt")).readlines()
        for l in output[1:]:
            sp = l.split()
            clid, clheight, ids = sp[0], sp[2], sp[3:]
            self.clusters[int(clid)] = [float(clheight), map(int,ids)]