def benchmark_recalibrators(job, sample, ref, dbsnp, mills): _log.info("Downloading ref") ref_id = download_url_job(job, ref) _log.info("Indexing reference.") faidx = run_samtools_faidx(job, ref_id) _log.info("Extracting reference sequence dictionary") ref_dict = run_picard_create_sequence_dictionary(job, ref_id) _log.info("Downloading dbSNP VCF") dbsnp_id = download_url_job(job, dbsnp) _log.info("Downloading Mills VCF") mills_id = download_url_job(job, mills) _log.info("Downloading reads") reads_id = download_url_job(job, sample) _log.info("Sorting reads by coordinate.") coordinate_sorted_bam = run_sambamba_sort(job, reads_id) _log.info("Indexing sorted BAM.") bam_index = run_samtools_index(job, coordinate_sorted_bam) run_adam_bqsr(job, reads_id, dbsnp_id) run_gatk3_bqsr(job, reads_id, bam_index, ref_id, faidx, ref_dict, dbsnp_id, mills_id)
def benchmark_realigners(job, sample, ref, g1k, mills): _log.info("Downloading ref") ref_id = download_url_job(job, ref) _log.info("Indexing reference.") faidx = run_samtools_faidx(job, ref_id) _log.info("Extracting reference sequence dictionary") ref_dict = run_picard_create_sequence_dictionary(job, ref_id) _log.info("Downloading 1000G VCF") g1k_id = download_url_job(job, g1k) _log.info("Downloading Mills VCF") mills_id = download_url_job(job, mills) _log.info("Downloading reads") reads_id = download_url_job(job, sample) _log.info("Sorting reads by coordinate.") coordinate_sorted_bam = run_sambamba_sort(job, reads_id) _log.info("Indexing sorted BAM.") bam_index = run_samtools_index(job, coordinate_sorted_bam) run_adam_ri(job, reads_id) run_gatk3_ir(job, reads_id, bam_index, ref_id, faidx, ref_dict, g1k_id, mills_id)
def benchmark_duplicate_markers(job, sample): _log.info("Downloading reads") reads_id = download_url_job(job, sample) _log.info("Sorting reads by coordinate.") coordinate_sorted_bam = run_sambamba_sort(job, reads_id) _log.info("Indexing sorted BAM.") bam_index = run_samtools_index(job, coordinate_sorted_bam) _log.info("Marking duplicates with picard.") picard_bam = picard_mark_duplicates(job, coordinate_sorted_bam, bam_index) _log.info("Marking duplicates with samtools.") samtools_bam = run_samtools_rmdup(job, coordinate_sorted_bam) _log.info("Marking duplicates with sambamba.") sambamba_bam = run_sambamba_markdup(job, coordinate_sorted_bam) run_adam_markdups(job, reads_id) _log.info("Sorting reads by name.") queryname_sorted_bam = run_sambamba_sort(job, reads_id, sort_by_name=True) _log.info("Dumping queryname sorted sam to bam.") queryname_sorted_sam = run_samtools_view(job, queryname_sorted_bam) _log.info("Marking duplicates with SAMBLASTER.") samblaster_sam = run_samblaster(job, queryname_sorted_sam)
def gatk3_transform(job, ref, in_file, snp_file, g1k_indels, mills_indels): _log.info("Downloading ref") ref_id = download_url_job(job, ref) _log.info("Indexing reference.") faidx = run_samtools_faidx(job, ref_id) _log.info("Extracting reference sequence dictionary") ref_dict = run_picard_create_sequence_dictionary(job, ref_id) _log.info("Downloading reads") reads_id = download_url_job(job, in_file) _log.info("Sorting reads.") sorted_bam = run_samtools_sort(job, reads_id) _log.info("Indexing reads.") bai = run_samtools_index(job, sorted_bam) _log.info("Downloading resources") g1k_id = download_url_job(job, g1k_indels) mills_id = download_url_job(job, mills_indels) snp_id = download_url_job(job, snp_file) _log.info("Running GATK preprocessing") return run_gatk_preprocessing(job, sorted_bam, bai, ref_id, ref_dict, faidx, g1k_id, mills_id, snp_id, realign=True)
def benchmark_sorters(job, sample): _log.info("Downloading reads") reads_id = download_url_job(job, sample) _log.info("Sorting reads with picard.") picard_sorted_bam = run_picard_sort(job, reads_id) _log.info("Sorting reads with samtools.") samtools_sorted_bam = run_samtools_sort(job, reads_id) _log.info("Sorting reads with sambamba.") sambamba_sorted_bam = run_sambamba_sort(job, reads_id) run_adam_sort(job, reads_id)