Ejemplo n.º 1
0
 def test_batch(self):
     """The 'batch' command."""
     target_bed = "formats/my-targets.bed"
     fasta = "formats/chrM-Y-trunc.hg19.fa"
     bam = "formats/na12878-chrM-Y-trunc.bam"
     annot = "formats/my-refflat.bed"
     # Build a single-sample WGS reference
     ref_fname, tgt_bed_fname, _ = batch.batch_make_reference(
         [bam], None, None, True, fasta, annot, True, 500, None, None,
         None, None, 'build', 1, False, "wgs")
     self.assertEqual(ref_fname, 'build/reference.cnn')
     refarr = cnvlib.read(ref_fname, 'bed')
     tgt_regions = tabio.read(tgt_bed_fname, 'bed')
     self.assertEqual(len(refarr), len(tgt_regions))
     # Build a single-sample hybrid-capture reference
     ref_fname, tgt_bed_fname, anti_bed_fname = batch.batch_make_reference(
         [bam], target_bed, None, True, fasta, None, True, 10, None, 1000,
         100, None, 'build', 1, False, "hybrid")
     self.assertEqual(ref_fname, 'build/reference.cnn')
     refarr = cnvlib.read(ref_fname, 'bed')
     tgt_regions = tabio.read(tgt_bed_fname, 'bed')
     anti_regions = tabio.read(anti_bed_fname, 'bed')
     self.assertEqual(len(refarr), len(tgt_regions) + len(anti_regions))
     # Run the same sample
     batch.batch_run_sample(
         bam, tgt_bed_fname, anti_bed_fname, ref_fname, 'build', True,
         True, True, None, False, False, "hybrid", 1)
     cns =  cnvlib.read("build/na12878-chrM-Y-trunc.cns")
     self.assertGreater(len(cns), 0)
Ejemplo n.º 2
0
 def test_read_vcf(self):
     """Read the VCF format."""
     # Paired VCF with full info
     fname = "formats/na12878_na12882_mix.vcf"
     v1 = tabio.read(fname, "vcf")
     self.assertLess(len(v1), linecount(fname))
     self.assertLess(0, len(v1))
     for sid in ("NA12882", "NA12878"):
         v2 = tabio.read(fname, "vcf", sample_id=sid)
         self.assertEqual(v2.sample_id, sid)
         self.assertEqual(len(v1), len(v2))
     for kwarg in ({'min_depth': 100},
                   {'skip_somatic': True},
                   {'skip_reject': True}):
         v3 = tabio.read(fname, "vcf", **kwarg)
         self.assertLess(len(v3), len(v1))
         self.assertLess(0, len(v3),
                         "%d variants left after filter %r"
                         % (len(v3), list(kwarg)[0]))
     # VCF header, no samples, no records
     v4 = tabio.read('formats/nosample.vcf', 'vcf')
     self.assertEqual(len(v4), 0)
     self.assertEqual(v4.sample_id, 'nosample')
     # VCF with 1 sample, no records
     v5 = tabio.read('formats/blank.vcf', 'vcf', sample_id='Blank')
     self.assertEqual(len(v5), 0)
     self.assertEqual(v5.sample_id, 'Blank')
Ejemplo n.º 3
0
 def test_autobin(self):
     """The 'autobin' command."""
     bam_fname = "formats/na12878-chrM-Y-trunc.bam"
     target_bed = "formats/my-targets.bed"
     targets = tabio.read(target_bed, 'bed')
     access_bed = "../data/access-5k-mappable.hg19.bed"
     accessible = tabio.read(access_bed, 'bed').filter(chromosome='chrY')
     for method in ('amplicon', 'wgs', 'hybrid'):
         (cov, bs), _ = autobin.do_autobin(bam_fname, method,
                                           targets=targets,
                                           access=accessible)
         self.assertGreater(cov, 0)
         self.assertGreater(bs, 0)
Ejemplo n.º 4
0
 def test_resize_ranges(self):
     """Test resizing bins."""
     baits_fname = 'formats/nv2_baits.interval_list'
     chrom_sizes = {
         'chr1': 249250621,
         'chr2': 243199373,
         'chr3': 198022430,
         'chr4': 191154276,
         'chr5': 180915260,
         'chr6': 171115067,
         'chr7': 159138663,
         'chr8': 146364022,
         'chr9': 141213431,
         'chr10': 135534747,
         'chr11': 135006516,
         'chr12': 133851895,
         'chr13': 115169878,
         'chr14': 107349540,
         'chr15': 102531392,
         'chr16': 90354753,
         'chr17': 81195210,
         'chr18': 78077248,
         'chr19': 59128983,
         'chr20': 63025520,
         'chr21': 48129895,
         'chr22': 51304566,
         'chrX': 155270560,
         'chrY': 59373566
     }
     bins = tabio.read(baits_fname, 'interval')
     for chrom, arr in bins.resize_ranges(1e7, chrom_sizes).by_chromosome():
         self.assertLessEqual(0, arr.start.min())
         self.assertLessEqual(arr.end.max(), chrom_sizes[chrom])
Ejemplo n.º 5
0
 def test_read_vcf(self):
     """Read the VCF format."""
     fname = "formats/na12878_na12882_mix.vcf"
     v1 = tabio.read(fname, "vcf")
     self.assertLess(len(v1), linecount(fname))
     self.assertLess(0, len(v1))
     for sid in ("NA12882", "NA12878"):
         v2 = tabio.read(fname, "vcf", sample_id=sid)
         self.assertEqual(v2.sample_id, sid)
         self.assertEqual(len(v1), len(v2))
     for kwarg in ({
             'min_depth': 100
     }, {
             'skip_somatic': True
     }, {
             'skip_reject': True
     }):
         v3 = tabio.read(fname, "vcf", **kwarg)
         self.assertLess(len(v3), len(v1))
         self.assertLess(0, len(v3))
Ejemplo n.º 6
0
 def test_segment(self):
     """The 'segment' command."""
     cnarr = cnvlib.read("formats/amplicon.cnr")
     # NB: R methods are in another script; haar is pure-Python
     segments = segmentation.do_segmentation(cnarr, "haar")
     self.assertGreater(len(segments), 0)
     segments = segmentation.do_segmentation(cnarr, "haar", threshold=.0001,
                                             skip_low=True)
     self.assertGreater(len(segments), 0)
     varr = tabio.read("formats/na12878_na12882_mix.vcf", "vcf")
     segments = segmentation.do_segmentation(cnarr, "haar", variants=varr)
     self.assertGreater(len(segments), 0)
Ejemplo n.º 7
0
 def test_call_filter(self):
     segments = cnvlib.read("formats/tr95t.segmetrics.cns")
     variants = tabio.read("formats/na12878_na12882_mix.vcf", "vcf")
     # Each filter individually, then all filters together
     for filters in (['ampdel'], ['cn'], ['ci'], ['sem'],
                     ['sem', 'cn', 'ampdel'],
                     ['ci', 'cn', 'ampdel']):
         result = commands.do_call(segments, variants, method="threshold",
                                   purity=.9, is_reference_male=True,
                                   is_sample_female=True, filters=filters)
         self.assertLessEqual(len(result), len(segments))
         self.assertLessEqual(len(segments.chromosome.unique()), len(result))
         for colname in 'baf', 'cn', 'cn1', 'cn2':
             self.assertIn(colname, result)
Ejemplo n.º 8
0
 def test_empty(self):
     """Instantiate from an empty file."""
     garr = tabio.read("formats/empty")
     self.assertEqual(len(garr), 0)
Ejemplo n.º 9
0
AP = argparse.ArgumentParser(description=__doc__)
AP.add_argument('refflat', help="UCSC refFlat.txt for the reference genome.")
AP.add_argument('-e',
                '--exons',
                action='store_true',
                help="""Emit each exon instead of the whole gene regions.""")
AP.add_argument('-f',
                '--flatten',
                action='store_true',
                help="""Flatten overlapping regions, keeping original
                boundaries. Not recommended with --exons.""")
AP.add_argument('-m',
                '--merge',
                metavar='BASEPAIRS',
                nargs='?',
                type=int,
                const=1,
                help="""Merge overlapping regions with different names.
                Recommended with --exons. Optional argument value is the
                number of overlapping bases between two regions to trigger a
                merge. [Default: %(const)s]""")
AP.add_argument('-o', '--output', help="Output filename.")
args = AP.parse_args()

regions = tabio.read(args.refflat, 'refflat', exons=args.exons)
if args.flatten:
    regions = regions.flatten()
elif args.merge:
    regions = regions.merge(bp=args.merge)
tabio.write(regions, args.output, 'bed4')
Ejemplo n.º 10
0
 def test_read_text(self):
     """Read the text region format."""
     fname = "formats/amplicon.text"
     regions = tabio.read(fname, "text")
     self.assertEqual(len(regions), linecount(fname))
     self.assertEqual(regions.sample_id, "amplicon")
Ejemplo n.º 11
0
 def test_read_picardhs(self):
     """Read Picard CalculateHsMetrics PER_TARGET_COVERAGE format."""
     fname = "picard/p2-5_5.antitargetcoverage.csv"
     cna = tabio.read(fname, "picardhs")
     self.assertEqual(len(cna), linecount(fname) - 1)
     self.assertEqual(cna.sample_id, "p2-5_5")
Ejemplo n.º 12
0
 def test_read_ilist(self):
     """Read the interval list format."""
     regions = tabio.read("formats/nv2_baits.interval_list", "interval")
     self.assertEqual(len(regions), 6809)
     self.assertEqual(regions.sample_id, "nv2_baits")
Ejemplo n.º 13
0
 def test_read_bed(self):
     """Read the BED format."""
     fname = "formats/amplicon.bed"
     regions = tabio.read(fname, "bed")
     self.assertEqual(len(regions), linecount(fname))
     self.assertEqual(regions.sample_id, "amplicon")
Ejemplo n.º 14
0
 def test_empty(self):
     """Instantiate from an empty file."""
     for fmt in ("auto", "tab", "bed", "interval", "text"):
         regions = tabio.read("formats/empty", fmt=fmt)
         self.assertEqual(len(regions), 0)
Ejemplo n.º 15
0
 def test_read_refflat(self):
     """Read the UCSC 'refFlat' format."""
     fname = "formats/refflat-mini.txt"
     regions = tabio.read(fname, 'refflat')
     self.assertEqual(len(regions), linecount(fname))
     self.assertEqual(13, regions.chromosome.nunique())
Ejemplo n.º 16
0
 def test_read_gff(self):
     """Read the GFF format."""
     fname = 'formats/example.gff'
     regions = tabio.read(fname, 'gff')
     self.assertEqual(len(regions), linecount(fname) - 2)
     self.assertEqual(regions.sample_id, 'example')