def test_targets(self): bam_bc_file = tk_test.in_path("namesort_test.bam") read_info_out = tk_test.out_path("read_info.h5") barcode_whitelist = bc_utils.load_barcode_whitelist("737K-april-2014") targets_filename = tk_test.in_path('agilent_kinome_targs.bed') targets_file = open(targets_filename, 'r') target_regions = tk_io.get_target_regions(targets_file) bam_in = tk_bam.create_bam_infile(bam_bc_file) r = compute_basic_stats(bam_in, target_regions, 1000, bam_in.references, barcode_whitelist=barcode_whitelist, read_h5_out=read_info_out) # insert_size_dists, nearest_targ_dists, summary_metrics, bc_table, mapq_counts, insert_size_hist = r misc_sm, bc_sms = r nearest_targ_dists = bc_sms.get('nearest_targ_dists') maxTargetDist = max(nearest_targ_dists.get_summarizer(60).dict.keys()) minTargetDist = min(nearest_targ_dists.get_summarizer(60).dict.keys()) self.assertEqual(minTargetDist, 130) self.assertEqual(maxTargetDist, 10000)
def setUp(self): fragment_phasing = tk_test.out_path("phasing/fragment_phasing.tsv") snp_vfr = VariantFileReader(SNP_INPUT_VCF) self.mix_prior = 0.0001 self.p = Phaser(snp_vfr, CONTIG_FILE, 'chr1', 0, 10, bc_mix_prob=self.mix_prior, min_junction_hap_conf=0.99, min_var_hap_conf=0.99, hap_block_size=5)
def _test_summary_metrics(self): read_info_out = tk_test.out_path("read_info.h5") insert_size_dists, nearest_targ_dists, summary_metrics, bc_table, mapq_counts, insert_size_hist = \ compute_basic_stats(self.bam_in, {}, read_h5_out=read_info_out) summary = summary_metrics self.assertEqual(summary["mapped_bases"], 6) self.assertEqual(summary["mean_dup_rate"], 1.0) self.assertEqual(summary["num_reads"], 6) self.assertEqual(summary["total_bases"], 12) p = tenkit.hdf5.read_data_frame(read_info_out) self.assertEqual(p.shape[0], 3)
def test_barcode_counts(self): bam_bc_file = tk_test.in_path("attach_bcs/attach_bcs_output.bam") read_info_out = tk_test.out_path("read_info.h5") barcode_whitelist = bc_utils.load_barcode_whitelist("737K-april-2014") bam_in = tk_bam.create_bam_infile(bam_bc_file) r = compute_basic_stats(bam_in, {}, 2000, bam_in.references, barcode_whitelist=barcode_whitelist, read_h5_out=read_info_out) # insert_size_dists, nearest_targ_dists, summary_metrics, bc_table, mapq_counts, insert_size_hist = r misc_sm, bc_sms = r # Look at the barcode results -- there should be a raw bc count for each read pair # n_raw_bcs = bc_table["count"].sum() n_reads = len([x for x in tk_bam.create_bam_infile(bam_bc_file)]) # self.assertEqual(n_raw_bcs, n_reads / 2) # Load the per-cluster table -- there should be a row for each read pair read_info = tenkit.hdf5.read_data_frame(read_info_out) self.assertEqual(read_info.shape[0], n_reads / 2)
# # Copyright (c) 2014 10X Genomics, Inc. All rights reserved. # import os import tenkit.test as tk_test import tenkit.fasta as tk_fasta import martian from .. import * martian.test_initialize(tk_test.out_path("")) IN_PREFIX = tk_test.in_path('combine_and_trim_reads') IN_RA_ALL = [ tk_test.in_path( 'combine_and_trim_reads/read-RA_si-AAAA_lane-1_chunk-1.fastq'), tk_test.in_path( 'combine_and_trim_reads/read-RA_si-AAAN_lane-1_chunk-1.fastq'), tk_test.in_path( 'combine_and_trim_reads/read-RA_si-CCCC_lane-1_chunk-1.fastq') ] class TestFunctions(tk_test.UnitTestBase): def test_setup_chunks(self): args = martian.Record({ 'input_mode': 'BCL_PROCESSOR', 'sample_def': [{ 'read_path': IN_PREFIX, 'sample_indices': ["AAAA", "CCCC"],
# # Copyright (c) 2014 10X Genomics, Inc. All rights reserved. # # Test attach bcs import pysam import tenkit.test as tk_test import tenkit.fasta as tk_fasta from .. import * import martian IN_FASTQ = tk_test.in_path("test_bwa.fastq") OUT_BAM = tk_test.out_path("test_aligner.bam") class TestFunctions(tk_test.UnitTestBase): def setUp(self): pass def test_align(self): args = { 'chunk_input': IN_FASTQ, 'aligner': 'bwa', 'aligner_method': 'MEM', 'reference_path': 'hg19', '__threads': 1, 'reads_interleaved': True } outs = {'default': OUT_BAM}
import crdna.bio_io as crdna_io import pysam import itertools import numpy as np import tenkit.constants import crdna.read_filter import martian martian.test_initialize("") # Patrick Marks # Simple test of deduper IN_BAM = tk_test.in_path('test_analyze_bias.bam') OUT_BAM = tk_test.out_path('test_dedup_out.bam') OUT_JSON = tk_test.out_path( 'test_dedup_summary.json') IN_BAM_BIG = tk_test.in_path("test_mark_duplicates.bam") class TestFunctions(tk_test.UnitTestBase): def setUp(self): pass def test_dedup(self): tenkit.constants.DUPLICATE_SUBSAMPLE_COVERAGES = [0.00001, 0.0001] args = martian.Record({ 'input': IN_BAM, 'estimated_coverage': 100.0, 'perfect_read_count': 1000, 'chunk_start':None, 'chunk_end':None }) outs = martian.Record({ 'output': OUT_BAM, 'duplicate_summary': OUT_JSON }) main_mark_duplicates(args, outs) out_bam = pysam.Samfile(OUT_BAM) dups = [ x.is_duplicate for x in out_bam ]
# Test attach bcs import pysam from itertools import groupby import tenkit.test as tk_test import tenkit.seq as tk_seq from .. import * from tenkit.constants import PROCESSED_BARCODE_TAG, RAW_BARCODE_TAG, SAMPLE_INDEX_TAG import martian IN_BAM = tk_test.in_path('attach_bcs/alignment_with_secondary.bam') IN_I1 = tk_test.in_path('attach_bcs/phix_I1.fastq') IN_I2 = tk_test.in_path('attach_bcs/phix_I2.fastq') IN_WHITELIST = '737K-april-2014' OUT_BAM = tk_test.out_path('test_attach_bcs.bam') class TestFunctions(tk_test.UnitTestBase): def setUp(self): pass def test_attach_bcs(self): # --align_input alignment_output.bam --barcode_input phix_I2.fastq --output test2.out --complete ~/c --stats ~/s args = { 'barcode_whitelist' : IN_WHITELIST, 'align_chunk' : IN_BAM, 'barcode_chunk' : IN_I2, 'sample_index_chunk' : IN_I1, 'gem_group' : None, 'paired_end' : True, 'exclude_non_bc_reads' : False,
# Copyright (c) 2014 10X Genomics, Inc. All rights reserved. # # Code for testing phaser.py # import scipy.sparse import math import tenkit.test as tk_test import tenkit.bio_io as tk_io from tenkit.bio_io import VariantFileReader, VariantFileWriter from ..phaser import * CONTIG_FILE = tk_test.in_path('phasing/small_fragments.h5') SNP_INPUT_VCF = tk_test.in_path('test_phasing_snps_sorted.vcf.gz') INDEL_INPUT_VCF = tk_test.in_path('test_phasing_indels_sorted.vcf.gz') OUTPUT_VCF = tk_test.out_path('test_phasing_output.vcf') OUTPUT_TSV = tk_test.out_path("test_bc_hap_out.tsv") import martian martian.test_initialize(tk_test.out_path("")) class TestPhaserBig(tk_test.UnitTestBase): def setUp(self): contig_file = tk_test.in_path("phasing/big_fragments.h5") snp_vfr = VariantFileReader(tk_test.in_path("phasing/default.vcf.gz")) self.p = Phaser(snp_vfr, contig_file, 'chr21', 10000000, 10500000) def test_calc_hap1_hap2_log_prob(self): # Generate a random assignment
# # Copyright (c) 2014 10X Genomics, Inc. All rights reserved. # import tenkit.test as tk_test from .. import * import pysam import martian martian.test_initialize("") # Patrick Marks # Simple test of deduper IN_FASTQ = tk_test.in_path('test_bc_sorted_big_fastq.fastq.gz') OUT_BAM = tk_test.out_path('test_unaligned_out.bam') class TestFunctions(tk_test.UnitTestBase): def setUp(self): pass def test_make_unaligned(self): args = martian.Record({ 'sample_id': 1234, 'output_format': "bam", 'read_group': "RG", 'read_chunk': IN_FASTQ }) outs = martian.Record({'barcoded_unaligned': OUT_BAM}) main(args, outs)
# # Copyright (c) 2014 10X Genomics, Inc. All rights reserved. # import os.path import tenkit.test as tk_test from .. import * IN_BAM = tk_test.in_path('test_attach_bc_vars.bam') OUT_VCF = tk_test.out_path('test_call_variants.vcf') class TestFunctions(tk_test.UnitTestBase): def setUp(self): pass def test_call_variants(self): test_locus = "chr1:10000..20000" args = martian.Record({ 'input': IN_BAM, 'locus': test_locus, 'reference_path': 'hg19', 'targets_file': None, 'restrict_locus': None, 'coverage': None, 'max_coverage': None, 'variant_mode': 'freebayes' }) outs = martian.Record({'default': OUT_VCF}) main(args, outs) self.assertTrue(os.path.exists(OUT_VCF))
def setUp(self): super(TestLaneCount, self).setUp() self.input_dir = tk_test.in_path("lane") self.output_dir = tk_test.out_path("prepare_samplesheet") os.makedirs(self.output_dir)
# # Copyright (c) 2014 10X Genomics, Inc. All rights reserved. # # Test attach bcs import pysam import tenkit.test as tk_test import tenkit.bio_io as tk_io from .. import * import martian IN_BAM = tk_test.in_path("attach_phasing/bam_test.bam") IN_FRAGS = tk_test.in_path("attach_phasing/fragments_test.tsv.gz") OUT_BAM = tk_test.out_path("test_attach_phasing.bam") class TestFunctions(tk_test.UnitTestBase): def setUp(self): pass def test_attach_phasing(self): args = martian.Record({ 'input': IN_BAM, 'fragment_phasing': IN_FRAGS, 'chunk_start': 0, 'chunk_end': 1 << 32 }) outs = martian.Record({ 'phased_possorted_bam': OUT_BAM,