def test_export(self): """Run the 'export' command with each format.""" # SEG seg_rows = export.export_seg(["formats/tr95t.cns"]) self.assertGreater(len(seg_rows), 0) seg2_rows = export.export_seg(["formats/tr95t.cns", "formats/cl_seq.cns"]) self.assertGreater(len(seg2_rows), len(seg_rows)) # THetA2 cnr = tabio.read_cna("formats/tr95t.cns") theta_rows = export.export_theta(cnr, None) self.assertGreater(len(theta_rows), 0) ref = tabio.read_cna("formats/reference-tr.cnn") theta_rows = export.export_theta(cnr, ref) self.assertGreater(len(theta_rows), 0) # Formats that calculate absolute copy number for fname, ploidy, is_f in [("tr95t.cns", 2, True), ("cl_seq.cns", 6, True), ("amplicon.cns", 2, False)]: cns = tabio.read_cna("formats/" + fname) # BED self.assertLess(len(export.export_bed(cns, ploidy, True, is_f, cns.sample_id, "ploidy")), len(cns)) self.assertLess(len(export.export_bed(cns, ploidy, True, is_f, cns.sample_id, "variant")), len(cns)) self.assertEqual(len(export.export_bed(cns, ploidy, True, is_f, cns.sample_id, "all")), len(cns)) # VCF _vheader, vcf_body = export.export_vcf(cns, ploidy, True, is_f) self.assertTrue(0 < len(vcf_body.splitlines()) < len(cns))
def test_ranges(self): """Test range methods: by_ranges, in_range, in_ranges.""" cnarr = tabio.read_cna("formats/amplicon.cnr") segarr = tabio.read_cna("formats/amplicon.cns") chrom_segarr = dict(segarr.by_chromosome()) for chrom, subarr in cnarr.by_chromosome(): count_segs = 0 count_bins = 0 subsegarr = chrom_segarr[chrom] for count_segs, (seg, bins) in enumerate(subarr.by_ranges(subsegarr)): count_bins += len(bins) self.assertEqual(seg.probes, len(bins)) self.assertEqual(len(bins), len( cnarr.in_range(seg.chromosome, seg.start, seg.end, mode='outer'))) self.assertEqual(len(bins), len( cnarr.in_range(seg.chromosome, seg.start, seg.end, mode='trim'))) self.assertEqual(len(subsegarr), count_segs + 1) self.assertEqual(len(subarr), count_bins) self.assertEqual(len(subarr), len( cnarr.in_ranges(chrom, subsegarr['start'], subsegarr['end'], mode="outer"))) self.assertEqual(len(subarr), len( subarr.in_ranges(starts=subsegarr['start'], ends=subsegarr['end'], mode="outer"))) self.assertEqual(len(subarr), len( cnarr.in_ranges(chrom, subsegarr['start'], subsegarr['end'], mode="trim"))) self.assertEqual(len(subarr), len( subarr.in_ranges(starts=subsegarr['start'], ends=subsegarr['end'], mode="trim")))
def test_gainloss(self): """The 'gainloss' command.""" probes = tabio.read_cna("formats/amplicon.cnr") rows = commands.do_gainloss(probes, male_reference=True) self.assertGreater(len(rows), 0) segs = tabio.read_cna("formats/amplicon.cns") rows = commands.do_gainloss(probes, segs, 0.3, 4, male_reference=True) self.assertGreater(len(rows), 0)
def test_metrics(self): """The 'metrics' command.""" cnarr = tabio.read_cna("formats/amplicon.cnr") segments = tabio.read_cna("formats/amplicon.cns") resids = cnarr.residuals(segments) self.assertLessEqual(len(resids), len(cnarr)) values = metrics.ests_of_scale(resids) for val in values: self.assertGreater(val, 0)
def test_residuals(self): cnarr = tabio.read_cna("formats/amplicon.cnr") segments = tabio.read_cna("formats/amplicon.cns") regions = gary.GenomicArray(segments.data).drop_extra_columns() for arg in (None, segments, regions): resid = cnarr.residuals(arg) self.assertAlmostEqual(0, resid.mean(), delta=.3) self.assertAlmostEqual(1, np.percentile(resid, 80), delta=.2) self.assertAlmostEqual(2, resid.std(), delta=.5)
def test_drop_extra_columns(self): """Test removal of optional 'gc' column.""" cna = tabio.read_cna('formats/reference-tr.cnn') self.assertIn('gc', cna) cleaned = cna.drop_extra_columns() self.assertNotIn('gc', cleaned) self.assertTrue((cleaned['log2'] == cna['log2']).all())
def test_segment_parallel(self): """The 'segment' command, in parallel.""" cnarr = tabio.read_cna("formats/amplicon.cnr") psegments = segmentation.do_segmentation(cnarr, "haar", processes=2) ssegments = segmentation.do_segmentation(cnarr, "haar", processes=1) self.assertEqual(psegments.data.shape, ssegments.data.shape) self.assertEqual(len(psegments.meta), len(ssegments.meta))
def test_by_chromosome(self): for fname in ("formats/amplicon.cnr", "formats/cl_seq.cns"): cnarr = tabio.read_cna(fname) row_count = 0 for _chrom, rows in cnarr.by_chromosome(): row_count += len(rows) self.assertEqual(row_count, len(cnarr))
def test_basic(self): """Test basic container functionality and magic methods.""" cna = tabio.read_cna('formats/reference-tr.cnn') # Length self.assertEqual(len(cna), linecount('formats/reference-tr.cnn') - 1) # Equality same = tabio.read_cna('formats/reference-tr.cnn') self.assertEqual(cna, same) # Item access orig = cna[0] cna[0] = orig cna[3:4] = cna[3:4] cna[6:10] = cna[6:10] self.assertEqual(tuple(cna[0]), tuple(same[0])) self.assertEqual(cna[3:6], same[3:6])
def test_segmetrics(self): """The 'segmetrics' command.""" cnarr = tabio.read_cna("formats/amplicon.cnr") segarr = tabio.read_cna("formats/amplicon.cns") for func in ( lambda x: metrics.confidence_interval_bootstrap(x, 0.05, 100), lambda x: metrics.prediction_interval(x, 0.05), ): lo, hi = commands._segmetric_interval(segarr, cnarr, func) self.assertEqual(len(lo), len(segarr)) self.assertEqual(len(hi), len(segarr)) sensible_segs_mask = (np.asarray(segarr['probes']) > 3) means = segarr[sensible_segs_mask, 'log2'] los = lo[sensible_segs_mask] his = hi[sensible_segs_mask] self.assertTrue((los < means).all()) self.assertTrue((means < his).all())
def test_call_gender(self): """Test each 'call' method on allosomes.""" for ( fname, sample_is_f, ref_is_m, chr1_expect, chrx_expect, chry_expect, chr1_cn, chrx_cn, chry_cn, ) in ( ("formats/f-on-f.cns", True, False, 0, 0, None, 2, 2, None), ("formats/f-on-m.cns", True, True, 0.585, 1, None, 3, 2, None), ("formats/m-on-f.cns", False, False, 0, -1, 0, 2, 1, 1), ("formats/m-on-m.cns", False, True, 0, 0, 0, 2, 1, 1), ): cns = tabio.read_cna(fname) chr1_idx = (cns.chromosome == 'chr1') chrx_idx = (cns.chromosome == 'chrX') chry_idx = (cns.chromosome == 'chrY') def test_chrom_means(segments): self.assertEqual(chr1_cn, segments['cn'][chr1_idx].mean()) self.assertAlmostEqual(chr1_expect, segments['log2'][chr1_idx].mean(), 0) self.assertEqual(chrx_cn, segments['cn'][chrx_idx].mean()) self.assertAlmostEqual(chrx_expect, segments['log2'][chrx_idx].mean(), 0) if not sample_is_f: self.assertEqual(chry_cn, segments['cn'][chry_idx].mean()) self.assertAlmostEqual(chry_expect, segments['log2'][chry_idx].mean(), 0) # Call threshold cns_thresh = commands.do_call(cns, None, "threshold", is_reference_male=ref_is_m, is_sample_female=sample_is_f) test_chrom_means(cns_thresh) # Call clonal pure cns_clone = commands.do_call(cns, None, "clonal", is_reference_male=ref_is_m, is_sample_female=sample_is_f) test_chrom_means(cns_clone) # Call clonal barely-mixed cns_p99 = commands.do_call(cns, None, "clonal", purity=0.99, is_reference_male=ref_is_m, is_sample_female=sample_is_f) test_chrom_means(cns_p99)
def test_call(self): """The 'call' command.""" # Methods: clonal, threshold, none tr_cns = tabio.read_cna("formats/tr95t.cns") tr_thresh = commands.do_call(tr_cns, None, "threshold", is_reference_male=True, is_sample_female=True) self.assertEqual(len(tr_cns), len(tr_thresh)) tr_clonal = commands.do_call(tr_cns, None, "clonal", purity=.65, is_reference_male=True, is_sample_female=True) self.assertEqual(len(tr_cns), len(tr_clonal)) cl_cns = tabio.read_cna("formats/cl_seq.cns") cl_thresh = commands.do_call(cl_cns, None, "threshold", thresholds=np.log2( (np.arange(12) + .5) / 6.), is_reference_male=True, is_sample_female=True) self.assertEqual(len(cl_cns), len(cl_thresh)) cl_clonal = commands.do_call(cl_cns, None, "clonal", ploidy=6, purity=.99, is_reference_male=True, is_sample_female=True) self.assertEqual(len(cl_cns), len(cl_clonal)) cl_none = commands.do_call(cl_cns, None, "none", ploidy=6, purity=.99, is_reference_male=True, is_sample_female=True) self.assertEqual(len(cl_cns), len(cl_none))
def test_segment(self): """The 'segment' command.""" cnarr = tabio.read_cna("formats/amplicon.cnr") # R methods are in another script segments = segmentation.do_segmentation(cnarr, "haar") self.assertGreater(len(segments), 0) segments = segmentation.do_segmentation(cnarr, "haar", threshold=.001, skip_low=True) self.assertGreater(len(segments), 0)
def test_segment(self): """The 'segment' command.""" cnarr = tabio.read_cna("formats/amplicon.cnr") # NB: R methods are in another script; haar is pure-Python segments = segmentation.do_segmentation(cnarr, "haar") self.assertGreater(len(segments), 0) segments = segmentation.do_segmentation(cnarr, "haar", threshold=.0001, skip_low=True) self.assertGreater(len(segments), 0) varr = tabio.read("formats/na12878_na12882_mix.vcf", "vcf") segments = segmentation.do_segmentation(cnarr, "haar", variants=varr) self.assertGreater(len(segments), 0)
def test_fix(self): """The 'fix' command.""" # Extract fake target/antitarget bins from a combined file ref = tabio.read_cna('formats/reference-tr.cnn') is_bg = (ref["gene"] == "Background") tgt_bins = ref[~is_bg] tgt_bins.log2 += np.random.randn(len(tgt_bins)) / 5 anti_bins = ref[is_bg] anti_bins.log2 += np.random.randn(len(anti_bins)) / 5 blank_bins = cnary.CopyNumArray([]) # Typical usage (hybrid capture) cnr = commands.do_fix(tgt_bins, anti_bins, ref) self.assertTrue(0 < len(cnr) <= len(ref)) # Blank antitargets (WGS or amplicon) cnr = commands.do_fix(tgt_bins, blank_bins, ref[~is_bg]) self.assertTrue(0 < len(cnr) <= len(tgt_bins))
def test_guess_xx(self): """Guess chromosomal sex from chrX log2 ratio value.""" for (fname, sample_is_f, ref_is_m) in ( ("formats/f-on-f.cns", True, False), ("formats/f-on-m.cns", True, True), ("formats/m-on-f.cns", False, False), ("formats/m-on-m.cns", False, True), ("formats/amplicon.cnr", False, True), ("formats/cl_seq.cns", True, True), ("formats/tr95t.cns", True, True), ("formats/reference-tr.cnn", False, False), ): guess = tabio.read_cna(fname).guess_xx(ref_is_m) self.assertEqual(guess, sample_is_f, "%s: guessed XX %s but is %s" % (fname, guess, sample_is_f))
def test_gender(self): """Guess chromosomal gender from chrX log2 ratio value.""" for (fname, sample_is_f, ref_is_m) in ( ("formats/f-on-f.cns", True, False), ("formats/f-on-m.cns", True, True), ("formats/m-on-f.cns", False, False), ("formats/m-on-m.cns", False, True), ("formats/amplicon.cnr", False, True), ("formats/cl_seq.cns", True, True), ("formats/tr95t.cns", True, True), ("formats/reference-tr.cnn", False, False), ): cnarr = tabio.read_cna(fname) if sample_is_f != cnarr.guess_xx(ref_is_m): print("Gender issue:", fname, sample_is_f, ref_is_m) self.assertEqual(sample_is_f, cnarr.guess_xx(ref_is_m))
def test_center_all(self): """Test recentering.""" cna = tabio.read_cna('formats/reference-tr.cnn') # Median-centering an already median-centered array -> no change chr1 = cna.in_range('chr1') self.assertAlmostEqual(0, np.median(chr1['log2']), places=1) chr1.center_all() orig_chr1_cvg = np.median(chr1['log2']) self.assertAlmostEqual(0, orig_chr1_cvg) # Median-centering resets a shift away from the median chr1plus2 = chr1.copy() chr1plus2['log2'] += 2.0 chr1plus2.center_all() self.assertAlmostEqual(np.median(chr1plus2['log2']), orig_chr1_cvg) # Other methods for centering are similar for a CN-neutral chromosome for method in ("mean", "mode", "biweight"): cp = chr1.copy() cp.center_all(method) self.assertLess(abs(cp['log2'].median() - orig_chr1_cvg), 0.1)
def test_breaks(self): """The 'breaks' command.""" probes = tabio.read_cna("formats/amplicon.cnr") segs = tabio.read_cna("formats/amplicon.cns") rows = commands.do_breaks(probes, segs, 4) self.assertGreater(len(rows), 0)
def setUp(self): self.ex_cnr = tabio.read_cna('formats/reference-tr.cnn')
def test_import_theta(self): """The 'import-theta' command.""" cns = tabio.read_cna("formats/nv3.cns") theta_fname = "formats/nv3.n3.results" for new_cns in commands.do_import_theta(cns, theta_fname): self.assertTrue(0 < len(new_cns) <= len(cns))
def test_empty(self): """Instantiate from an empty file.""" cnarr = tabio.read_cna("formats/empty") self.assertEqual(len(cnarr), 0)