Exemple #1
0
def sample_sampling_diversity(sample, samdir, args=None):
    # Sampling sample to "size, and compute the "index" of the subsample
    assert len(args) == 3
    size = args[0]
    indices = args[1]
    workdir = args[2]
    if size is None:  # no sampling, compute diversity of original sample
        subsampledir = samdir
    else:
        subsampledir = os.path.join(workdir, "subsample")
        system("mkdir -p %s" % subsampledir)
        libsample.sampling(sample, samdir, subsampledir, (size,))
    
    sampling = SampleDiversityStat()
    sampling.set_sample_info(sample)
    counts = sample_get_clone_attrs(sample.name, subsampledir, "count")

    #import rpy2.rinterface as rinterface
    #rinterface.set_initoptions(('rpy2', '--no-save', '--no-restore'))
    #rinterface.initr()

    import rpy2.robjects as robjs
    from rpy2.robjects.packages import importr
    vegan = importr("vegan")
    rcounts = robjs.IntVector(counts)
    for index in indices:
        if index == 'numclone':
            sampling[index] = len(counts)
        elif index == 'fisher_alpha':
            rfisher = vegan.fisher_alpha(rcounts)
            sampling[index] = rfisher[0]
        else:
            rval = vegan.diversity(rcounts, index)
            sampling[index] = rval[0]
    return sampling
    def test_sampling(self):
        self.assertRaises(ValueError, libsample.sampling, None, [5])
        empty_sample = libsample.Sample("Empty", [])
        self.assertRaises(ValueError, libsample.sampling, empty_sample, [7])
        self.assertRaises(ValueError, libsample.sampling, self.sample, [0])
        self.assertRaises(ValueError, libsample.sampling, self.sample, [20])

        subsample = libsample.sampling(self.sample, [7])
        self.assertTrue(len(subsample.clones) <= 4)
        self.assertEqual(subsample.size, 7)
        size = sum([c.count for c in subsample.clones])
        self.assertEqual(size, 7)
        freq = sum([c.freq for c in subsample.clones])
        self.assertTrue(abs(1 - freq) < 0.01)
        
        seqs = [c.nuc for c in subsample.clones]
        uniqseqs = []
        for s in seqs:
            if s not in uniqseqs:
                uniqseqs.append(s)
        self.assertTrue(len(uniqseqs) == len(seqs))

        for c in subsample.clones:
            self.assertTrue(c.count > 0)
            self.assertTrue(c.freq > 0)
            self.assertTrue(c.nuc in self.s2v)
            self.assertTrue(c.vgenes == self.s2v[c.nuc])
            self.assertTrue(c.jgenes == self.s2j[c.nuc])