Beispiel #1
0
def _prep_subsampled_bams(data, work_dir):
    """Prepare a subsampled BAM file with discordants from samblaster and minimal correct pairs.

    This attempts to minimize run times by pre-extracting useful reads mixed
    with subsampled normal pairs to estimate paired end distributions:

    https://groups.google.com/d/msg/delly-users/xmia4lwOd1Q/uaajoBkahAIJ

    Subsamples correctly aligned reads to 100 million based on speedseq defaults and
    evaluations on NA12878 whole genome data:

    https://github.com/cc2qe/speedseq/blob/ca624ba9affb0bd0fb88834ca896e9122639ec94/bin/speedseq#L1102

    XXX Currently not used as new versions of delly do not get good sensitivity
    with downsampled BAMs.
    """
    sr_bam, disc_bam = sshared.get_split_discordants(data, work_dir)
    ds_bam = bam.downsample(
        dd.get_align_bam(data),
        data,
        1e8,
        read_filter="-F 'not secondary_alignment and proper_pair'",
        always_run=True,
        work_dir=work_dir)
    out_bam = "%s-final%s" % utils.splitext_plus(ds_bam)
    if not utils.file_exists(out_bam):
        bam.merge([ds_bam, sr_bam, disc_bam], out_bam, data["config"])
    bam.index(out_bam, data["config"])
    return [out_bam]
Beispiel #2
0
def _prep_subsampled_bams(data, work_dir):
    """Prepare a subsampled BAM file with discordants from samblaster and minimal correct pairs.

    This attempts to minimize run times by pre-extracting useful reads mixed
    with subsampled normal pairs to estimate paired end distributions:

    https://groups.google.com/d/msg/delly-users/xmia4lwOd1Q/uaajoBkahAIJ

    Subsamples correctly aligned reads to 100 million based on speedseq defaults and
    evaluations on NA12878 whole genome data:

    https://github.com/cc2qe/speedseq/blob/ca624ba9affb0bd0fb88834ca896e9122639ec94/bin/speedseq#L1102

    XXX Currently does not downsample as new versions do not get good sensitivity with
    downsampled BAMs.
    """
    full_bam, sr_bam, disc_bam = sshared.get_split_discordants(data, work_dir)
    return [full_bam]

    ds_bam = bam.downsample(full_bam, data, 1e8, read_filter="-F 'not secondary_alignment and proper_pair'",
                            always_run=True, work_dir=work_dir)
    out_bam = "%s-final%s" % utils.splitext_plus(ds_bam)
    if not utils.file_exists(out_bam):
        bam.merge([ds_bam, sr_bam, disc_bam], out_bam, data["config"])
    bam.index(out_bam, data["config"])
    return [out_bam]
Beispiel #3
0
def cufflinks_assemble(*samples):
    rnaseq_resources = samples[0][0]["genome_resources"]["rnaseq"]
    config = samples[0][0]["config"]
    dirs = samples[0][0]["dirs"]
    gtf_file = rnaseq_resources.get("transcripts", None)
    ref_file = samples[0][0]["sam_ref"]
    bam_files = [data[0]['work_bam'] for data in samples]
    num_cores = config["algorithm"].get("num_cores", 1)
    out_dir = os.path.join(dirs["work"], "assembly")
    safe_makedir(out_dir)
    merged_file = os.path.join(out_dir, "merged.bam")
    merged_file = bam.merge(bam_files, merged_file, config)
    assembly_dir = cufflinks.assemble(merged_file, ref_file, gtf_file,
                                      num_cores, out_dir)
    transcripts = [os.path.join(assembly_dir, "assembly", "transcripts.gtf")]
    merged_gtf = cufflinks.merge(transcripts, ref_file, gtf_file, num_cores)
    for data in samples:
        data[0]['assembly'] = assembly_dir
    return samples
Beispiel #4
0
def merge_unmapped(mapped_sam, unmapped_bam, config):
    merged_bam = os.path.join(os.path.dirname(mapped_sam), "merged.bam")
    bam_file = bam.sam_to_bam(mapped_sam, config)
    if not file_exists(merged_bam):
        merged_bam = bam.merge([bam_file, unmapped_bam], merged_bam, config)
    return merged_bam
Beispiel #5
0
def merge_unmapped(mapped_sam, unmapped_bam, config):
    merged_bam = os.path.join(os.path.dirname(mapped_sam), "merged.bam")
    bam_file = bam.sam_to_bam(mapped_sam, config)
    if not file_exists(merged_bam):
        merged_bam = bam.merge([bam_file, unmapped_bam], merged_bam, config)
    return merged_bam