コード例 #1
0
    def test_targets(self):
        bam_bc_file = tk_test.in_path("namesort_test.bam")
        read_info_out = tk_test.out_path("read_info.h5")
        barcode_whitelist = bc_utils.load_barcode_whitelist("737K-april-2014")

        targets_filename = tk_test.in_path('agilent_kinome_targs.bed')
        targets_file = open(targets_filename, 'r')
        target_regions = tk_io.get_target_regions(targets_file)

        bam_in = tk_bam.create_bam_infile(bam_bc_file)
        r = compute_basic_stats(bam_in,
                                target_regions,
                                1000,
                                bam_in.references,
                                barcode_whitelist=barcode_whitelist,
                                read_h5_out=read_info_out)
        # insert_size_dists, nearest_targ_dists, summary_metrics, bc_table, mapq_counts, insert_size_hist = r
        misc_sm, bc_sms = r

        nearest_targ_dists = bc_sms.get('nearest_targ_dists')
        maxTargetDist = max(nearest_targ_dists.get_summarizer(60).dict.keys())
        minTargetDist = min(nearest_targ_dists.get_summarizer(60).dict.keys())

        self.assertEqual(minTargetDist, 130)
        self.assertEqual(maxTargetDist, 10000)
コード例 #2
0
    def test_barcode_counts(self):
        bam_bc_file = tk_test.in_path("attach_bcs/attach_bcs_output.bam")
        read_info_out = tk_test.out_path("read_info.h5")
        barcode_whitelist = bc_utils.load_barcode_whitelist("737K-april-2014")
        bam_in = tk_bam.create_bam_infile(bam_bc_file)
        r = compute_basic_stats(bam_in, {},
                                2000,
                                bam_in.references,
                                barcode_whitelist=barcode_whitelist,
                                read_h5_out=read_info_out)
        # insert_size_dists, nearest_targ_dists, summary_metrics, bc_table, mapq_counts, insert_size_hist = r
        misc_sm, bc_sms = r

        # Look at the barcode results -- there should be a raw bc count for each read pair
        # n_raw_bcs = bc_table["count"].sum()
        n_reads = len([x for x in tk_bam.create_bam_infile(bam_bc_file)])

        # self.assertEqual(n_raw_bcs, n_reads / 2)

        # Load the per-cluster table -- there should be a row for each read pair
        read_info = tenkit.hdf5.read_data_frame(read_info_out)

        self.assertEqual(read_info.shape[0], n_reads / 2)
コード例 #3
0
    def test_target_finding(self):
        # Check a few targets by hand

        targets_filename = tk_test.in_path('agilent_kinome_targs.bed')
        targets_file = open(targets_filename, 'r')
        target_regions = tk_io.get_target_regions(targets_file)

        chr1_regions = target_regions['chr1']
        chr1_list = chr1_regions.get_region_list()
        test_reg = chr1_list[0]

        dist = get_read_regions_dist(0, test_reg.start - 10, chr1_regions)
        self.assertEqual(dist, 10)

        dist = get_read_regions_dist(0, test_reg.start + 10, chr1_regions)
        self.assertEqual(dist, 0)

        dist = get_read_regions_dist(test_reg.end + 2, test_reg.end + 3,
                                     chr1_regions)
        self.assertEqual(dist, 2)

        dist = get_read_regions_dist(test_reg.end - 2, test_reg.end + 3,
                                     chr1_regions)
        self.assertEqual(dist, 0)
コード例 #4
0
#
# Copyright (c) 2014 10X Genomics, Inc. All rights reserved.
#
# Code for testing analyze_run.py
#

from pyfasta import Fasta
import tenkit.bam as tk_bam
import tenkit.test as tk_test
from tenkit.regions import Regions
from .. import *

## Input files
bam_in_file = tk_test.in_path('test_analyze_bias.bam')
fasta_dir = tk_test.in_path('fasta/')

class TestFunctions(tk_test.UnitTestBase):
    def setUp(self):
        self.bam_in = tk_bam.create_bam_infile(bam_in_file)

    def test_get_depth_info(self):
        ref_fasta = Fasta(fasta_dir + 'test/chr0.fa')
        chr0 = ref_fasta['chr0']
        confident_regions = Regions([(0,10000000)])

        reads = list(self.bam_in)
        r = get_depth_info(reads, "chr0", 0, len(chr0), None, confident_regions)
        (depth_df, summary_depth_info, confident_depth_info, target_info, target_cov) = r

        reads_dd = filter(lambda x: not x.is_duplicate, reads)
        r_dd = get_depth_info(reads_dd, "chr0", 0, len(chr0), None, confident_regions)
コード例 #5
0
from .. import *
import crdna.bio_io as crdna_io
import pysam
import itertools
import numpy as np
import tenkit.constants
import crdna.read_filter

import martian

martian.test_initialize("")

# Patrick Marks
# Simple test of deduper

IN_BAM = tk_test.in_path('test_analyze_bias.bam')
OUT_BAM = tk_test.out_path('test_dedup_out.bam')
OUT_JSON = tk_test.out_path( 'test_dedup_summary.json')
IN_BAM_BIG = tk_test.in_path("test_mark_duplicates.bam")

class TestFunctions(tk_test.UnitTestBase):
    def setUp(self):
        pass

    def test_dedup(self):
        tenkit.constants.DUPLICATE_SUBSAMPLE_COVERAGES = [0.00001, 0.0001]
        args = martian.Record({ 'input': IN_BAM, 'estimated_coverage': 100.0, 'perfect_read_count': 1000, 'chunk_start':None, 'chunk_end':None })
        outs = martian.Record({ 'output': OUT_BAM, 'duplicate_summary': OUT_JSON })
        main_mark_duplicates(args, outs)

        out_bam = pysam.Samfile(OUT_BAM)
コード例 #6
0
ファイル: test_attach_bcs.py プロジェクト: umccr/longranger
#
# Copyright (c) 2014 10X Genomics, Inc. All rights reserved.
#

# Test attach bcs
import pysam
from itertools import groupby
import tenkit.test as tk_test
import tenkit.seq as tk_seq
from .. import *
from tenkit.constants import PROCESSED_BARCODE_TAG, RAW_BARCODE_TAG, SAMPLE_INDEX_TAG

import martian

IN_BAM = tk_test.in_path('attach_bcs/alignment_with_secondary.bam')
IN_I1 = tk_test.in_path('attach_bcs/phix_I1.fastq')
IN_I2 = tk_test.in_path('attach_bcs/phix_I2.fastq')
IN_WHITELIST = '737K-april-2014'

OUT_BAM = tk_test.out_path('test_attach_bcs.bam')

class TestFunctions(tk_test.UnitTestBase):
    def setUp(self):
        pass

    def test_attach_bcs(self):
        #  --align_input alignment_output.bam --barcode_input phix_I2.fastq --output test2.out --complete ~/c --stats ~/s
        args = {
            'barcode_whitelist' : IN_WHITELIST,
            'align_chunk' : IN_BAM,
            'barcode_chunk' : IN_I2,
コード例 #7
0
 def setUp(self):
     contig_file = tk_test.in_path("phasing/big_fragments.h5")
     snp_vfr = VariantFileReader(tk_test.in_path("phasing/default.vcf.gz"))
     self.p = Phaser(snp_vfr, contig_file, 'chr21', 10000000, 10500000)
コード例 #8
0
#
# Copyright (c) 2014 10X Genomics, Inc. All rights reserved.
#
# Code for testing phaser.py
#

import scipy.sparse
import math
import tenkit.test as tk_test
import tenkit.bio_io as tk_io
from tenkit.bio_io import VariantFileReader, VariantFileWriter
from ..phaser import *

CONTIG_FILE = tk_test.in_path('phasing/small_fragments.h5')
SNP_INPUT_VCF = tk_test.in_path('test_phasing_snps_sorted.vcf.gz')
INDEL_INPUT_VCF = tk_test.in_path('test_phasing_indels_sorted.vcf.gz')
OUTPUT_VCF = tk_test.out_path('test_phasing_output.vcf')
OUTPUT_TSV = tk_test.out_path("test_bc_hap_out.tsv")

import martian

martian.test_initialize(tk_test.out_path(""))


class TestPhaserBig(tk_test.UnitTestBase):
    def setUp(self):
        contig_file = tk_test.in_path("phasing/big_fragments.h5")
        snp_vfr = VariantFileReader(tk_test.in_path("phasing/default.vcf.gz"))
        self.p = Phaser(snp_vfr, contig_file, 'chr21', 10000000, 10500000)

    def test_calc_hap1_hap2_log_prob(self):
コード例 #9
0
#
# Copyright (c) 2014 10X Genomics, Inc. All rights reserved.
#
import tenkit.test as tk_test
from .. import *
import pysam

import martian

martian.test_initialize("")

# Patrick Marks
# Simple test of deduper

IN_FASTQ = tk_test.in_path('test_bc_sorted_big_fastq.fastq.gz')
OUT_BAM = tk_test.out_path('test_unaligned_out.bam')


class TestFunctions(tk_test.UnitTestBase):
    def setUp(self):
        pass

    def test_make_unaligned(self):
        args = martian.Record({
            'sample_id': 1234,
            'output_format': "bam",
            'read_group': "RG",
            'read_chunk': IN_FASTQ
        })
        outs = martian.Record({'barcoded_unaligned': OUT_BAM})
        main(args, outs)
コード例 #10
0
#
# Copyright (c) 2014 10X Genomics, Inc. All rights reserved.
#

# Test attach bcs
import pysam
import tenkit.test as tk_test
import tenkit.bio_io as tk_io
from .. import *

import martian

IN_BAM = tk_test.in_path("attach_phasing/bam_test.bam")
IN_FRAGS = tk_test.in_path("attach_phasing/fragments_test.tsv.gz")
OUT_BAM = tk_test.out_path("test_attach_phasing.bam")


class TestFunctions(tk_test.UnitTestBase):
    def setUp(self):
        pass

    def test_attach_phasing(self):

        args = martian.Record({
            'input': IN_BAM,
            'fragment_phasing': IN_FRAGS,
            'chunk_start': 0,
            'chunk_end': 1 << 32
        })
        outs = martian.Record({
            'phased_possorted_bam': OUT_BAM,
コード例 #11
0
#
# Copyright (c) 2014 10X Genomics, Inc. All rights reserved.
#
from .. import *
import tenkit.test as tk_test
import tenkit.seq as tk_seq
import tenkit.hdf5
import barcodes.utils as bc_utils

bam_in_file = tk_test.in_path('test_map_rate.bam')


class TestFunctions(tk_test.UnitTestBase):
    def setUp(self):
        self.bam_in = tk_bam.create_bam_infile(bam_in_file)

    def _test_summary_metrics(self):

        read_info_out = tk_test.out_path("read_info.h5")

        insert_size_dists, nearest_targ_dists, summary_metrics, bc_table, mapq_counts, insert_size_hist = \
                compute_basic_stats(self.bam_in, {}, read_h5_out=read_info_out)

        summary = summary_metrics

        self.assertEqual(summary["mapped_bases"], 6)
        self.assertEqual(summary["mean_dup_rate"], 1.0)
        self.assertEqual(summary["num_reads"], 6)
        self.assertEqual(summary["total_bases"], 12)

        p = tenkit.hdf5.read_data_frame(read_info_out)
コード例 #12
0
#
# Copyright (c) 2014 10X Genomics, Inc. All rights reserved.
#
import os.path
import tenkit.test as tk_test
from .. import *

IN_BAM = tk_test.in_path('test_attach_bc_vars.bam')
OUT_VCF = tk_test.out_path('test_call_variants.vcf')


class TestFunctions(tk_test.UnitTestBase):
    def setUp(self):
        pass

    def test_call_variants(self):
        test_locus = "chr1:10000..20000"
        args = martian.Record({
            'input': IN_BAM,
            'locus': test_locus,
            'reference_path': 'hg19',
            'targets_file': None,
            'restrict_locus': None,
            'coverage': None,
            'max_coverage': None,
            'variant_mode': 'freebayes'
        })
        outs = martian.Record({'default': OUT_VCF})

        main(args, outs)
        self.assertTrue(os.path.exists(OUT_VCF))
コード例 #13
0
#
# Copyright (c) 2014 10X Genomics, Inc. All rights reserved.
#

import scipy.sparse
import math
import tenkit.test as tk_test
from .. import *
import martian

TEST_SPLIT_BAM = tk_test.in_path('test_split.bam')

martian.test_initialize(tk_test.out_path(""))


class TestGetReadpairLoci(tk_test.UnitTestBase):
    def setUp(test):
        pass

    def test_get_discordant_loci(self):
        # Reads 3,4,7,11 are neither secondary nor read1 so they should never be considered.
        # Reads 5, 8, 9, 10 are split
        # Reads 1, 2, 6 are rp and read1

        loci = get_discordant_loci(TEST_SPLIT_BAM,
                                   min_insert=0,
                                   max_insert=300,
                                   min_sv_len=300)
        # Only reads 5,6,8,9,10 are included (1-based read indices)
        self.assertEqual(len(loci), 10)
        self.assertEqual(loci[0], ('chr20', 60173, 60773, (0, 1)))
コード例 #14
0
ファイル: __init__.py プロジェクト: umccr/longranger
 def setUp(self):
     super(TestLaneCount, self).setUp()
     self.input_dir = tk_test.in_path("lane")
     self.output_dir = tk_test.out_path("prepare_samplesheet")
     os.makedirs(self.output_dir)
コード例 #15
0
#
# Copyright (c) 2014 10X Genomics, Inc. All rights reserved.
#

# Test attach bcs
import pysam
import tenkit.test as tk_test
import tenkit.fasta as tk_fasta
from .. import *

import martian

IN_FASTQ = tk_test.in_path("test_bwa.fastq")
OUT_BAM = tk_test.out_path("test_aligner.bam")


class TestFunctions(tk_test.UnitTestBase):
    def setUp(self):
        pass

    def test_align(self):
        args = {
            'chunk_input': IN_FASTQ,
            'aligner': 'bwa',
            'aligner_method': 'MEM',
            'reference_path': 'hg19',
            '__threads': 1,
            'reads_interleaved': True
        }
        outs = {'default': OUT_BAM}
コード例 #16
0
#
# Copyright (c) 2014 10X Genomics, Inc. All rights reserved.
#
import os
import tenkit.test as tk_test
import tenkit.fasta as tk_fasta
import martian
from .. import *

martian.test_initialize(tk_test.out_path(""))

IN_PREFIX = tk_test.in_path('combine_and_trim_reads')
IN_RA_ALL = [
    tk_test.in_path(
        'combine_and_trim_reads/read-RA_si-AAAA_lane-1_chunk-1.fastq'),
    tk_test.in_path(
        'combine_and_trim_reads/read-RA_si-AAAN_lane-1_chunk-1.fastq'),
    tk_test.in_path(
        'combine_and_trim_reads/read-RA_si-CCCC_lane-1_chunk-1.fastq')
]


class TestFunctions(tk_test.UnitTestBase):
    def test_setup_chunks(self):

        args = martian.Record({
            'input_mode':
            'BCL_PROCESSOR',
            'sample_def': [{
                'read_path': IN_PREFIX,
                'sample_indices': ["AAAA", "CCCC"],
コード例 #17
0
#
# Copyright (c) 2014 10X Genomics, Inc. All rights reserved.
#

import scipy.sparse
import math
import tenkit.test as tk_test
import tenkit.bio_io as tk_io
from .. import *
import martian

TEST_BAM = tk_test.in_path('test_count_reads.bam')
TEST_TARGETS = tk_test.in_path('test_targets_for_counting.txt')

martian.test_initialize(tk_test.out_path(""))

class TestCountReadsBcs(tk_test.UnitTestBase):
    def setUp(test):
        pass


    def test_get_non_overlapping_wins(self):
        starts = np.arange(0, 12, 2)
        stops = np.arange(2, 14, 2)
        sel = get_non_overlapping_wins(starts, stops)
        assert(np.all(sel == np.arange(6)))

        starts = np.arange(6)
        stops = np.arange(2, 8)
        sel = get_non_overlapping_wins(starts, stops)
        assert(list(sel) == [0, 2, 4])