Beispiel #1
0
def benchmark_recalibrators(job, sample, ref, dbsnp, mills):

    _log.info("Downloading ref")
    ref_id = download_url_job(job, ref)

    _log.info("Indexing reference.")
    faidx = run_samtools_faidx(job, ref_id)

    _log.info("Extracting reference sequence dictionary")
    ref_dict = run_picard_create_sequence_dictionary(job, ref_id)

    _log.info("Downloading dbSNP VCF")
    dbsnp_id = download_url_job(job, dbsnp)

    _log.info("Downloading Mills VCF")
    mills_id = download_url_job(job, mills)

    _log.info("Downloading reads")
    reads_id = download_url_job(job, sample)

    _log.info("Sorting reads by coordinate.")
    coordinate_sorted_bam = run_sambamba_sort(job, reads_id)

    _log.info("Indexing sorted BAM.")
    bam_index = run_samtools_index(job, coordinate_sorted_bam)

    run_adam_bqsr(job, reads_id, dbsnp_id)

    run_gatk3_bqsr(job, reads_id, bam_index, ref_id, faidx, ref_dict, dbsnp_id,
                   mills_id)
Beispiel #2
0
def benchmark_realigners(job, sample, ref, g1k, mills):

    _log.info("Downloading ref")
    ref_id = download_url_job(job, ref)

    _log.info("Indexing reference.")
    faidx = run_samtools_faidx(job, ref_id)

    _log.info("Extracting reference sequence dictionary")
    ref_dict = run_picard_create_sequence_dictionary(job, ref_id)

    _log.info("Downloading 1000G VCF")
    g1k_id = download_url_job(job, g1k)

    _log.info("Downloading Mills VCF")
    mills_id = download_url_job(job, mills)

    _log.info("Downloading reads")
    reads_id = download_url_job(job, sample)

    _log.info("Sorting reads by coordinate.")
    coordinate_sorted_bam = run_sambamba_sort(job, reads_id)

    _log.info("Indexing sorted BAM.")
    bam_index = run_samtools_index(job, coordinate_sorted_bam)

    run_adam_ri(job, reads_id)

    run_gatk3_ir(job, reads_id, bam_index, ref_id, faidx, ref_dict, g1k_id,
                 mills_id)
Beispiel #3
0
def benchmark_duplicate_markers(job, sample):

    _log.info("Downloading reads")
    reads_id = download_url_job(job, sample)

    _log.info("Sorting reads by coordinate.")
    coordinate_sorted_bam = run_sambamba_sort(job, reads_id)

    _log.info("Indexing sorted BAM.")
    bam_index = run_samtools_index(job, coordinate_sorted_bam)

    _log.info("Marking duplicates with picard.")
    picard_bam = picard_mark_duplicates(job, coordinate_sorted_bam, bam_index)

    _log.info("Marking duplicates with samtools.")
    samtools_bam = run_samtools_rmdup(job, coordinate_sorted_bam)

    _log.info("Marking duplicates with sambamba.")
    sambamba_bam = run_sambamba_markdup(job, coordinate_sorted_bam)

    run_adam_markdups(job, reads_id)

    _log.info("Sorting reads by name.")
    queryname_sorted_bam = run_sambamba_sort(job, reads_id, sort_by_name=True)

    _log.info("Dumping queryname sorted sam to bam.")
    queryname_sorted_sam = run_samtools_view(job, queryname_sorted_bam)

    _log.info("Marking duplicates with SAMBLASTER.")
    samblaster_sam = run_samblaster(job, queryname_sorted_sam)
Beispiel #4
0
def gatk3_transform(job, ref, in_file, snp_file, g1k_indels, mills_indels):

    _log.info("Downloading ref")
    ref_id = download_url_job(job, ref)

    _log.info("Indexing reference.")
    faidx = run_samtools_faidx(job, ref_id)

    _log.info("Extracting reference sequence dictionary")
    ref_dict = run_picard_create_sequence_dictionary(job, ref_id)

    _log.info("Downloading reads")
    reads_id = download_url_job(job, in_file)

    _log.info("Sorting reads.")
    sorted_bam = run_samtools_sort(job, reads_id)

    _log.info("Indexing reads.")
    bai = run_samtools_index(job, sorted_bam)

    _log.info("Downloading resources")
    g1k_id = download_url_job(job, g1k_indels)
    mills_id = download_url_job(job, mills_indels)
    snp_id = download_url_job(job, snp_file)

    _log.info("Running GATK preprocessing")
    return run_gatk_preprocessing(job,
                                  sorted_bam,
                                  bai,
                                  ref_id,
                                  ref_dict,
                                  faidx,
                                  g1k_id,
                                  mills_id,
                                  snp_id,
                                  realign=True)
Beispiel #5
0
def benchmark_sorters(job, sample):

    _log.info("Downloading reads")
    reads_id = download_url_job(job, sample)

    _log.info("Sorting reads with picard.")
    picard_sorted_bam = run_picard_sort(job, reads_id)

    _log.info("Sorting reads with samtools.")
    samtools_sorted_bam = run_samtools_sort(job, reads_id)

    _log.info("Sorting reads with sambamba.")
    sambamba_sorted_bam = run_sambamba_sort(job, reads_id)

    run_adam_sort(job, reads_id)