def sample_sampling_diversity(sample, samdir, args=None): # Sampling sample to "size, and compute the "index" of the subsample assert len(args) == 3 size = args[0] indices = args[1] workdir = args[2] if size is None: # no sampling, compute diversity of original sample subsampledir = samdir else: subsampledir = os.path.join(workdir, "subsample") system("mkdir -p %s" % subsampledir) libsample.sampling(sample, samdir, subsampledir, (size,)) sampling = SampleDiversityStat() sampling.set_sample_info(sample) counts = sample_get_clone_attrs(sample.name, subsampledir, "count") #import rpy2.rinterface as rinterface #rinterface.set_initoptions(('rpy2', '--no-save', '--no-restore')) #rinterface.initr() import rpy2.robjects as robjs from rpy2.robjects.packages import importr vegan = importr("vegan") rcounts = robjs.IntVector(counts) for index in indices: if index == 'numclone': sampling[index] = len(counts) elif index == 'fisher_alpha': rfisher = vegan.fisher_alpha(rcounts) sampling[index] = rfisher[0] else: rval = vegan.diversity(rcounts, index) sampling[index] = rval[0] return sampling
def test_sampling(self): self.assertRaises(ValueError, libsample.sampling, None, [5]) empty_sample = libsample.Sample("Empty", []) self.assertRaises(ValueError, libsample.sampling, empty_sample, [7]) self.assertRaises(ValueError, libsample.sampling, self.sample, [0]) self.assertRaises(ValueError, libsample.sampling, self.sample, [20]) subsample = libsample.sampling(self.sample, [7]) self.assertTrue(len(subsample.clones) <= 4) self.assertEqual(subsample.size, 7) size = sum([c.count for c in subsample.clones]) self.assertEqual(size, 7) freq = sum([c.freq for c in subsample.clones]) self.assertTrue(abs(1 - freq) < 0.01) seqs = [c.nuc for c in subsample.clones] uniqseqs = [] for s in seqs: if s not in uniqseqs: uniqseqs.append(s) self.assertTrue(len(uniqseqs) == len(seqs)) for c in subsample.clones: self.assertTrue(c.count > 0) self.assertTrue(c.freq > 0) self.assertTrue(c.nuc in self.s2v) self.assertTrue(c.vgenes == self.s2v[c.nuc]) self.assertTrue(c.jgenes == self.s2j[c.nuc])