Ejemplo n.º 1
0
 def test_export(self):
     """Run the 'export' command with each format."""
     # SEG
     seg_rows = export.export_seg(["formats/tr95t.cns"])
     self.assertGreater(len(seg_rows), 0)
     seg2_rows = export.export_seg(["formats/tr95t.cns",
                                    "formats/cl_seq.cns"])
     self.assertGreater(len(seg2_rows), len(seg_rows))
     # THetA2
     cnr = tabio.read_cna("formats/tr95t.cns")
     theta_rows = export.export_theta(cnr, None)
     self.assertGreater(len(theta_rows), 0)
     ref = tabio.read_cna("formats/reference-tr.cnn")
     theta_rows = export.export_theta(cnr, ref)
     self.assertGreater(len(theta_rows), 0)
     # Formats that calculate absolute copy number
     for fname, ploidy, is_f in [("tr95t.cns", 2, True),
                                 ("cl_seq.cns", 6, True),
                                 ("amplicon.cns", 2, False)]:
         cns = tabio.read_cna("formats/" + fname)
         # BED
         self.assertLess(len(export.export_bed(cns, ploidy, True, is_f,
                                               cns.sample_id, "ploidy")),
                         len(cns))
         self.assertLess(len(export.export_bed(cns, ploidy, True, is_f,
                                               cns.sample_id, "variant")),
                         len(cns))
         self.assertEqual(len(export.export_bed(cns, ploidy, True, is_f,
                                                cns.sample_id, "all")),
                          len(cns))
         # VCF
         _vheader, vcf_body = export.export_vcf(cns, ploidy, True, is_f)
         self.assertTrue(0 < len(vcf_body.splitlines()) < len(cns))
Ejemplo n.º 2
0
 def test_ranges(self):
     """Test range methods: by_ranges, in_range, in_ranges."""
     cnarr = tabio.read_cna("formats/amplicon.cnr")
     segarr = tabio.read_cna("formats/amplicon.cns")
     chrom_segarr = dict(segarr.by_chromosome())
     for chrom, subarr in cnarr.by_chromosome():
         count_segs = 0
         count_bins = 0
         subsegarr = chrom_segarr[chrom]
         for count_segs, (seg, bins) in enumerate(subarr.by_ranges(subsegarr)):
             count_bins += len(bins)
             self.assertEqual(seg.probes, len(bins))
             self.assertEqual(len(bins), len(
                 cnarr.in_range(seg.chromosome, seg.start, seg.end,
                                mode='outer')))
             self.assertEqual(len(bins), len(
                 cnarr.in_range(seg.chromosome, seg.start, seg.end,
                                mode='trim')))
         self.assertEqual(len(subsegarr), count_segs + 1)
         self.assertEqual(len(subarr), count_bins)
         self.assertEqual(len(subarr), len(
             cnarr.in_ranges(chrom, subsegarr['start'], subsegarr['end'],
                             mode="outer")))
         self.assertEqual(len(subarr), len(
             subarr.in_ranges(starts=subsegarr['start'],
                              ends=subsegarr['end'], mode="outer")))
         self.assertEqual(len(subarr), len(
             cnarr.in_ranges(chrom, subsegarr['start'], subsegarr['end'],
                             mode="trim")))
         self.assertEqual(len(subarr), len(
             subarr.in_ranges(starts=subsegarr['start'],
                              ends=subsegarr['end'], mode="trim")))
Ejemplo n.º 3
0
 def test_gainloss(self):
     """The 'gainloss' command."""
     probes = tabio.read_cna("formats/amplicon.cnr")
     rows = commands.do_gainloss(probes, male_reference=True)
     self.assertGreater(len(rows), 0)
     segs = tabio.read_cna("formats/amplicon.cns")
     rows = commands.do_gainloss(probes, segs, 0.3, 4, male_reference=True)
     self.assertGreater(len(rows), 0)
Ejemplo n.º 4
0
 def test_metrics(self):
     """The 'metrics' command."""
     cnarr = tabio.read_cna("formats/amplicon.cnr")
     segments = tabio.read_cna("formats/amplicon.cns")
     resids = cnarr.residuals(segments)
     self.assertLessEqual(len(resids), len(cnarr))
     values = metrics.ests_of_scale(resids)
     for val in values:
         self.assertGreater(val, 0)
Ejemplo n.º 5
0
 def test_residuals(self):
     cnarr = tabio.read_cna("formats/amplicon.cnr")
     segments = tabio.read_cna("formats/amplicon.cns")
     regions = gary.GenomicArray(segments.data).drop_extra_columns()
     for arg in (None, segments, regions):
         resid = cnarr.residuals(arg)
         self.assertAlmostEqual(0, resid.mean(), delta=.3)
         self.assertAlmostEqual(1, np.percentile(resid, 80), delta=.2)
         self.assertAlmostEqual(2, resid.std(), delta=.5)
Ejemplo n.º 6
0
 def test_drop_extra_columns(self):
     """Test removal of optional 'gc' column."""
     cna = tabio.read_cna('formats/reference-tr.cnn')
     self.assertIn('gc', cna)
     cleaned = cna.drop_extra_columns()
     self.assertNotIn('gc', cleaned)
     self.assertTrue((cleaned['log2'] == cna['log2']).all())
Ejemplo n.º 7
0
 def test_segment_parallel(self):
     """The 'segment' command, in parallel."""
     cnarr = tabio.read_cna("formats/amplicon.cnr")
     psegments = segmentation.do_segmentation(cnarr, "haar", processes=2)
     ssegments = segmentation.do_segmentation(cnarr, "haar", processes=1)
     self.assertEqual(psegments.data.shape, ssegments.data.shape)
     self.assertEqual(len(psegments.meta), len(ssegments.meta))
Ejemplo n.º 8
0
 def test_by_chromosome(self):
     for fname in ("formats/amplicon.cnr", "formats/cl_seq.cns"):
         cnarr = tabio.read_cna(fname)
         row_count = 0
         for _chrom, rows in cnarr.by_chromosome():
             row_count += len(rows)
         self.assertEqual(row_count, len(cnarr))
Ejemplo n.º 9
0
 def test_basic(self):
     """Test basic container functionality and magic methods."""
     cna = tabio.read_cna('formats/reference-tr.cnn')
     # Length
     self.assertEqual(len(cna),
                      linecount('formats/reference-tr.cnn') - 1)
     # Equality
     same = tabio.read_cna('formats/reference-tr.cnn')
     self.assertEqual(cna, same)
     # Item access
     orig = cna[0]
     cna[0] = orig
     cna[3:4] = cna[3:4]
     cna[6:10] = cna[6:10]
     self.assertEqual(tuple(cna[0]), tuple(same[0]))
     self.assertEqual(cna[3:6], same[3:6])
Ejemplo n.º 10
0
 def test_segmetrics(self):
     """The 'segmetrics' command."""
     cnarr = tabio.read_cna("formats/amplicon.cnr")
     segarr = tabio.read_cna("formats/amplicon.cns")
     for func in (
             lambda x: metrics.confidence_interval_bootstrap(x, 0.05, 100),
             lambda x: metrics.prediction_interval(x, 0.05),
     ):
         lo, hi = commands._segmetric_interval(segarr, cnarr, func)
         self.assertEqual(len(lo), len(segarr))
         self.assertEqual(len(hi), len(segarr))
         sensible_segs_mask = (np.asarray(segarr['probes']) > 3)
         means = segarr[sensible_segs_mask, 'log2']
         los = lo[sensible_segs_mask]
         his = hi[sensible_segs_mask]
         self.assertTrue((los < means).all())
         self.assertTrue((means < his).all())
Ejemplo n.º 11
0
    def test_call_gender(self):
        """Test each 'call' method on allosomes."""
        for (
                fname,
                sample_is_f,
                ref_is_m,
                chr1_expect,
                chrx_expect,
                chry_expect,
                chr1_cn,
                chrx_cn,
                chry_cn,
        ) in (
            ("formats/f-on-f.cns", True, False, 0, 0, None, 2, 2, None),
            ("formats/f-on-m.cns", True, True, 0.585, 1, None, 3, 2, None),
            ("formats/m-on-f.cns", False, False, 0, -1, 0, 2, 1, 1),
            ("formats/m-on-m.cns", False, True, 0, 0, 0, 2, 1, 1),
        ):
            cns = tabio.read_cna(fname)
            chr1_idx = (cns.chromosome == 'chr1')
            chrx_idx = (cns.chromosome == 'chrX')
            chry_idx = (cns.chromosome == 'chrY')

            def test_chrom_means(segments):
                self.assertEqual(chr1_cn, segments['cn'][chr1_idx].mean())
                self.assertAlmostEqual(chr1_expect,
                                       segments['log2'][chr1_idx].mean(), 0)
                self.assertEqual(chrx_cn, segments['cn'][chrx_idx].mean())
                self.assertAlmostEqual(chrx_expect,
                                       segments['log2'][chrx_idx].mean(), 0)
                if not sample_is_f:
                    self.assertEqual(chry_cn, segments['cn'][chry_idx].mean())
                    self.assertAlmostEqual(chry_expect,
                                           segments['log2'][chry_idx].mean(),
                                           0)

            # Call threshold
            cns_thresh = commands.do_call(cns,
                                          None,
                                          "threshold",
                                          is_reference_male=ref_is_m,
                                          is_sample_female=sample_is_f)
            test_chrom_means(cns_thresh)
            # Call clonal pure
            cns_clone = commands.do_call(cns,
                                         None,
                                         "clonal",
                                         is_reference_male=ref_is_m,
                                         is_sample_female=sample_is_f)
            test_chrom_means(cns_clone)
            # Call clonal barely-mixed
            cns_p99 = commands.do_call(cns,
                                       None,
                                       "clonal",
                                       purity=0.99,
                                       is_reference_male=ref_is_m,
                                       is_sample_female=sample_is_f)
            test_chrom_means(cns_p99)
Ejemplo n.º 12
0
 def test_call(self):
     """The 'call' command."""
     # Methods: clonal, threshold, none
     tr_cns = tabio.read_cna("formats/tr95t.cns")
     tr_thresh = commands.do_call(tr_cns,
                                  None,
                                  "threshold",
                                  is_reference_male=True,
                                  is_sample_female=True)
     self.assertEqual(len(tr_cns), len(tr_thresh))
     tr_clonal = commands.do_call(tr_cns,
                                  None,
                                  "clonal",
                                  purity=.65,
                                  is_reference_male=True,
                                  is_sample_female=True)
     self.assertEqual(len(tr_cns), len(tr_clonal))
     cl_cns = tabio.read_cna("formats/cl_seq.cns")
     cl_thresh = commands.do_call(cl_cns,
                                  None,
                                  "threshold",
                                  thresholds=np.log2(
                                      (np.arange(12) + .5) / 6.),
                                  is_reference_male=True,
                                  is_sample_female=True)
     self.assertEqual(len(cl_cns), len(cl_thresh))
     cl_clonal = commands.do_call(cl_cns,
                                  None,
                                  "clonal",
                                  ploidy=6,
                                  purity=.99,
                                  is_reference_male=True,
                                  is_sample_female=True)
     self.assertEqual(len(cl_cns), len(cl_clonal))
     cl_none = commands.do_call(cl_cns,
                                None,
                                "none",
                                ploidy=6,
                                purity=.99,
                                is_reference_male=True,
                                is_sample_female=True)
     self.assertEqual(len(cl_cns), len(cl_none))
Ejemplo n.º 13
0
 def test_segment(self):
     """The 'segment' command."""
     cnarr = tabio.read_cna("formats/amplicon.cnr")
     # R methods are in another script
     segments = segmentation.do_segmentation(cnarr, "haar")
     self.assertGreater(len(segments), 0)
     segments = segmentation.do_segmentation(cnarr,
                                             "haar",
                                             threshold=.001,
                                             skip_low=True)
     self.assertGreater(len(segments), 0)
Ejemplo n.º 14
0
 def test_segment(self):
     """The 'segment' command."""
     cnarr = tabio.read_cna("formats/amplicon.cnr")
     # NB: R methods are in another script; haar is pure-Python
     segments = segmentation.do_segmentation(cnarr, "haar")
     self.assertGreater(len(segments), 0)
     segments = segmentation.do_segmentation(cnarr, "haar", threshold=.0001,
                                             skip_low=True)
     self.assertGreater(len(segments), 0)
     varr = tabio.read("formats/na12878_na12882_mix.vcf", "vcf")
     segments = segmentation.do_segmentation(cnarr, "haar", variants=varr)
     self.assertGreater(len(segments), 0)
Ejemplo n.º 15
0
 def test_fix(self):
     """The 'fix' command."""
     # Extract fake target/antitarget bins from a combined file
     ref = tabio.read_cna('formats/reference-tr.cnn')
     is_bg = (ref["gene"] == "Background")
     tgt_bins = ref[~is_bg]
     tgt_bins.log2 += np.random.randn(len(tgt_bins)) / 5
     anti_bins = ref[is_bg]
     anti_bins.log2 += np.random.randn(len(anti_bins)) / 5
     blank_bins = cnary.CopyNumArray([])
     # Typical usage (hybrid capture)
     cnr = commands.do_fix(tgt_bins, anti_bins, ref)
     self.assertTrue(0 < len(cnr) <= len(ref))
     # Blank antitargets (WGS or amplicon)
     cnr = commands.do_fix(tgt_bins, blank_bins, ref[~is_bg])
     self.assertTrue(0 < len(cnr) <= len(tgt_bins))
Ejemplo n.º 16
0
 def test_guess_xx(self):
     """Guess chromosomal sex from chrX log2 ratio value."""
     for (fname, sample_is_f, ref_is_m) in (
             ("formats/f-on-f.cns", True, False),
             ("formats/f-on-m.cns", True, True),
             ("formats/m-on-f.cns", False, False),
             ("formats/m-on-m.cns", False, True),
             ("formats/amplicon.cnr", False, True),
             ("formats/cl_seq.cns", True, True),
             ("formats/tr95t.cns", True, True),
             ("formats/reference-tr.cnn", False, False),
         ):
         guess = tabio.read_cna(fname).guess_xx(ref_is_m)
         self.assertEqual(guess, sample_is_f,
                          "%s: guessed XX %s but is %s"
                          % (fname, guess, sample_is_f))
Ejemplo n.º 17
0
 def test_gender(self):
     """Guess chromosomal gender from chrX log2 ratio value."""
     for (fname, sample_is_f, ref_is_m) in (
         ("formats/f-on-f.cns", True, False),
         ("formats/f-on-m.cns", True, True),
         ("formats/m-on-f.cns", False, False),
         ("formats/m-on-m.cns", False, True),
         ("formats/amplicon.cnr", False, True),
         ("formats/cl_seq.cns", True, True),
         ("formats/tr95t.cns", True, True),
         ("formats/reference-tr.cnn", False, False),
     ):
         cnarr = tabio.read_cna(fname)
         if sample_is_f != cnarr.guess_xx(ref_is_m):
             print("Gender issue:", fname, sample_is_f, ref_is_m)
         self.assertEqual(sample_is_f, cnarr.guess_xx(ref_is_m))
Ejemplo n.º 18
0
 def test_center_all(self):
     """Test recentering."""
     cna = tabio.read_cna('formats/reference-tr.cnn')
     # Median-centering an already median-centered array -> no change
     chr1 = cna.in_range('chr1')
     self.assertAlmostEqual(0, np.median(chr1['log2']), places=1)
     chr1.center_all()
     orig_chr1_cvg = np.median(chr1['log2'])
     self.assertAlmostEqual(0, orig_chr1_cvg)
     # Median-centering resets a shift away from the median
     chr1plus2 = chr1.copy()
     chr1plus2['log2'] += 2.0
     chr1plus2.center_all()
     self.assertAlmostEqual(np.median(chr1plus2['log2']), orig_chr1_cvg)
     # Other methods for centering are similar for a CN-neutral chromosome
     for method in ("mean", "mode", "biweight"):
         cp = chr1.copy()
         cp.center_all(method)
         self.assertLess(abs(cp['log2'].median() - orig_chr1_cvg), 0.1)
Ejemplo n.º 19
0
 def test_breaks(self):
     """The 'breaks' command."""
     probes = tabio.read_cna("formats/amplicon.cnr")
     segs = tabio.read_cna("formats/amplicon.cns")
     rows = commands.do_breaks(probes, segs, 4)
     self.assertGreater(len(rows), 0)
Ejemplo n.º 20
0
 def setUp(self):
     self.ex_cnr = tabio.read_cna('formats/reference-tr.cnn')
Ejemplo n.º 21
0
 def test_import_theta(self):
     """The 'import-theta' command."""
     cns = tabio.read_cna("formats/nv3.cns")
     theta_fname = "formats/nv3.n3.results"
     for new_cns in commands.do_import_theta(cns, theta_fname):
         self.assertTrue(0 < len(new_cns) <= len(cns))
Ejemplo n.º 22
0
 def test_empty(self):
     """Instantiate from an empty file."""
     cnarr = tabio.read_cna("formats/empty")
     self.assertEqual(len(cnarr), 0)