コード例 #1
0
ファイル: bwa.py プロジェクト: matanhofree/bcbio-nextgen
def align_pipe(fastq_file, pair_file, ref_file, names, align_dir, data):
    """Perform piped alignment of fastq input files, generating sorted output BAM.
    """
    pair_file = pair_file if pair_file else ""
    out_file = os.path.join(align_dir, "{0}-sort.bam".format(names["lane"]))
    qual_format = data["config"]["algorithm"].get("quality_format", "").lower()
    if data.get("align_split"):
        final_file = out_file
        out_file, data = alignprep.setup_combine(final_file, data)
        fastq_file = alignprep.split_namedpipe_cl(fastq_file, data)
        if pair_file:
            pair_file = alignprep.split_namedpipe_cl(pair_file, data)
    else:
        final_file = None
        if qual_format == "illumina":
            fastq_file = alignprep.fastq_convert_pipe_cl(fastq_file, data)
            if pair_file:
                pair_file = alignprep.fastq_convert_pipe_cl(pair_file, data)
    rg_info = novoalign.get_rg_info(names)
    if not utils.file_exists(out_file) and (final_file is None or not utils.file_exists(final_file)):
        # If we cannot do piping, use older bwa aln approach
        if not _can_use_mem(fastq_file, data):
            out_file = _align_backtrack(fastq_file, pair_file, ref_file, out_file,
                                        names, rg_info, data)
        else:
            out_file = _align_mem(fastq_file, pair_file, ref_file, out_file,
                                  names, rg_info, data)
    data["work_bam"] = out_file
    return data
コード例 #2
0
ファイル: bwa.py プロジェクト: vhuarui/bcbio-nextgen
def align_pipe(fastq_file, pair_file, ref_file, names, align_dir, data):
    """Perform piped alignment of fastq input files, generating sorted output BAM.
    """
    pair_file = pair_file if pair_file else ""
    out_file = os.path.join(align_dir, "{0}-sort.bam".format(names["lane"]))
    qual_format = data["config"]["algorithm"].get("quality_format", "").lower()
    if data.get("align_split"):
        final_file = out_file
        out_file, data = alignprep.setup_combine(final_file, data)
        fastq_file = alignprep.split_namedpipe_cl(fastq_file, data)
        if pair_file:
            pair_file = alignprep.split_namedpipe_cl(pair_file, data)
    else:
        final_file = None
        if qual_format == "illumina":
            fastq_file = alignprep.fastq_convert_pipe_cl(fastq_file, data)
            if pair_file:
                pair_file = alignprep.fastq_convert_pipe_cl(pair_file, data)
    rg_info = novoalign.get_rg_info(names)
    if not utils.file_exists(out_file) and (final_file is None or
                                            not utils.file_exists(final_file)):
        # If we cannot do piping, use older bwa aln approach
        if ("bwa-mem" in tz.get_in(["config", "algorithm", "tools_off"], data,
                                   []) or not _can_use_mem(fastq_file, data)):
            out_file = _align_backtrack(fastq_file, pair_file, ref_file,
                                        out_file, names, rg_info, data)
        else:
            out_file = _align_mem(fastq_file, pair_file, ref_file, out_file,
                                  names, rg_info, data)
    data["work_bam"] = out_file
    return data
コード例 #3
0
def align_pipe(fastq_file, pair_file, ref_file, names, align_dir, data):
    """Perform piped alignment of fastq input files, generating sorted output BAM.
    """
    pair_file = pair_file if pair_file else ""
    # back compatible -- older files were named with lane information, use sample name now
    out_file = os.path.join(align_dir, "{0}-sort.bam".format(names["lane"]))
    if not utils.file_exists(out_file):
        out_file = os.path.join(align_dir, "{0}-sort.bam".format(dd.get_sample_name(data)))
    qual_format = data["config"]["algorithm"].get("quality_format", "").lower()
    min_size = None
    if data.get("align_split") or fastq_file.endswith(".sdf"):
        if fastq_file.endswith(".sdf"):
            min_size = rtg.min_read_size(fastq_file)
        final_file = out_file
        out_file, data = alignprep.setup_combine(final_file, data)
        fastq_file, pair_file = alignprep.split_namedpipe_cls(fastq_file, pair_file, data)
    else:
        final_file = None
        if qual_format == "illumina":
            fastq_file = alignprep.fastq_convert_pipe_cl(fastq_file, data)
            if pair_file:
                pair_file = alignprep.fastq_convert_pipe_cl(pair_file, data)
    rg_info = novoalign.get_rg_info(names)
    if not utils.file_exists(out_file) and (final_file is None or not utils.file_exists(final_file)):
        # If we cannot do piping, use older bwa aln approach
        if ("bwa-mem" not in dd.get_tools_on(data) and
              ("bwa-mem" in dd.get_tools_off(data) or not _can_use_mem(fastq_file, data, min_size))):
            out_file = _align_backtrack(fastq_file, pair_file, ref_file, out_file,
                                        names, rg_info, data)
        else:
            out_file = _align_mem(fastq_file, pair_file, ref_file, out_file,
                                  names, rg_info, data)
    data["work_bam"] = out_file
    return data
コード例 #4
0
ファイル: bwa.py プロジェクト: stl-23/bcbio-nextgen
def align_pipe(fastq_file, pair_file, ref_file, names, align_dir, data):
    """Perform piped alignment of fastq input files, generating sorted output BAM.
    """
    pair_file = pair_file if pair_file else ""
    # back compatible -- older files were named with lane information, use sample name now
    if names["lane"] != dd.get_sample_name(data):
        out_file = os.path.join(align_dir,
                                "{0}-sort.bam".format(names["lane"]))
    else:
        out_file = None
    if not out_file or not utils.file_exists(out_file):
        umi_ext = "-cumi" if "umi_bam" in data else ""
        out_file = os.path.join(
            align_dir, "{0}-sort{1}.bam".format(dd.get_sample_name(data),
                                                umi_ext))
    qual_format = data["config"]["algorithm"].get("quality_format", "").lower()
    min_size = None
    if data.get("align_split") or fastq_file.endswith(".sdf"):
        if fastq_file.endswith(".sdf"):
            min_size = rtg.min_read_size(fastq_file)
        final_file = out_file
        out_file, data = alignprep.setup_combine(final_file, data)
        fastq_file, pair_file = alignprep.split_namedpipe_cls(
            fastq_file, pair_file, data)
    else:
        final_file = None
        if qual_format == "illumina":
            fastq_file = alignprep.fastq_convert_pipe_cl(fastq_file, data)
            if pair_file:
                pair_file = alignprep.fastq_convert_pipe_cl(pair_file, data)
    rg_info = novoalign.get_rg_info(names)
    if not utils.file_exists(out_file) and (final_file is None or
                                            not utils.file_exists(final_file)):
        # If we cannot do piping, use older bwa aln approach
        if ("bwa-mem" not in dd.get_tools_on(data)
                and ("bwa-mem" in dd.get_tools_off(data)
                     or not _can_use_mem(fastq_file, data, min_size))):
            out_file = _align_backtrack(fastq_file, pair_file, ref_file,
                                        out_file, names, rg_info, data)
        else:
            if is_precollapsed_bam(
                    data) or not hla_on(data) or needs_separate_hla(data):
                out_file = _align_mem(fastq_file, pair_file, ref_file,
                                      out_file, names, rg_info, data)
            else:
                out_file = _align_mem_hla(fastq_file, pair_file, ref_file,
                                          out_file, names, rg_info, data)
    data["work_bam"] = out_file

    # bwakit will corrupt the non-HLA alignments in a UMI collapsed BAM file
    # (see https://github.com/bcbio/bcbio-nextgen/issues/3069)
    if needs_separate_hla(data):
        hla_file = os.path.join(os.path.dirname(out_file),
                                "HLA-" + os.path.basename(out_file))
        hla_file = _align_mem_hla(fastq_file, pair_file, ref_file, hla_file,
                                  names, rg_info, data)
        data["hla_bam"] = hla_file
    return data
コード例 #5
0
def align_pipe(fastq_file, pair_file, ref_file, names, align_dir, data):
    """Perform piped alignment of fastq input files, generating sorted output BAM.
    """
    pair_file = pair_file if pair_file else ""
    out_file = os.path.join(align_dir, "{0}-sort.bam".format(names["lane"]))
    qual_format = data["config"]["algorithm"].get("quality_format", "").lower()
    if data.get("align_split"):
        final_file = out_file
        out_file, data = alignprep.setup_combine(final_file, data)
        fastq_file = alignprep.split_namedpipe_cl(fastq_file, data)
        if pair_file:
            pair_file = alignprep.split_namedpipe_cl(pair_file, data)
    else:
        final_file = None
        if qual_format == "illumina":
            fastq_file = alignprep.fastq_convert_pipe_cl(fastq_file, data)
            if pair_file:
                pair_file = alignprep.fastq_convert_pipe_cl(pair_file, data)
    samtools = config_utils.get_program("samtools", data["config"])
    bwa = config_utils.get_program("bwa", data["config"])
    resources = config_utils.get_resources("samtools", data["config"])
    num_cores = data["config"]["algorithm"].get("num_cores", 1)
    # adjust memory for samtools since used alongside alignment
    max_mem = config_utils.adjust_memory(resources.get("memory", "2G"), 3,
                                         "decrease")
    rg_info = novoalign.get_rg_info(names)
    if not utils.file_exists(out_file) and (final_file is None or
                                            not utils.file_exists(final_file)):
        # If we cannot do piping, use older bwa aln approach
        if not can_pipe(fastq_file, data):
            return align(fastq_file, pair_file, ref_file, names, align_dir,
                         data)
        else:
            with utils.curdir_tmpdir() as work_dir:
                with file_transaction(out_file) as tx_out_file:
                    tx_out_prefix = os.path.splitext(tx_out_file)[0]
                    cmd = (
                        "{bwa} mem -M -t {num_cores} -R '{rg_info}' -v 1 {ref_file} "
                        "{fastq_file} {pair_file} "
                        "| {samtools} view -b -S -u - "
                        "| {samtools} sort -@ {num_cores} -m {max_mem} - {tx_out_prefix}"
                    )
                    cmd = cmd.format(**locals())
                    do.run(
                        cmd,
                        "bwa mem alignment from fastq: %s" % names["sample"],
                        None, [
                            do.file_nonempty(tx_out_file),
                            do.file_reasonable_size(tx_out_file, fastq_file)
                        ])
    data["work_bam"] = out_file
    return data
コード例 #6
0
ファイル: bwa.py プロジェクト: Galithil/bcbio-nextgen
def align_pipe(fastq_file, pair_file, ref_file, names, align_dir, data):
    """Perform piped alignment of fastq input files, generating sorted output BAM.
    """
    pair_file = pair_file if pair_file else ""
    out_file = os.path.join(align_dir, "{0}-sort.bam".format(names["lane"]))
    qual_format = data["config"]["algorithm"].get("quality_format", "").lower()
    if data.get("align_split"):
        final_file = out_file
        out_file, data = alignprep.setup_combine(final_file, data)
        fastq_file = alignprep.split_namedpipe_cl(fastq_file, data)
        if pair_file:
            pair_file = alignprep.split_namedpipe_cl(pair_file, data)
    else:
        final_file = None
        if qual_format == "illumina":
            fastq_file = alignprep.fastq_convert_pipe_cl(fastq_file, data)
            if pair_file:
                pair_file = alignprep.fastq_convert_pipe_cl(pair_file, data)
    samtools = config_utils.get_program("samtools", data["config"])
    bwa = config_utils.get_program("bwa", data["config"])
    resources = config_utils.get_resources("samtools", data["config"])
    num_cores = data["config"]["algorithm"].get("num_cores", 1)
    # adjust memory for samtools since used alongside alignment
    max_mem = config_utils.adjust_memory(resources.get("memory", "2G"),
                                         3, "decrease")
    rg_info = novoalign.get_rg_info(names)
    if not utils.file_exists(out_file) and (final_file is None or not utils.file_exists(final_file)):
        # If we cannot do piping, use older bwa aln approach
        if not can_pipe(fastq_file, data):
            return align(fastq_file, pair_file, ref_file, names, align_dir, data)
        else:
            with utils.curdir_tmpdir() as work_dir:
                with file_transaction(out_file) as tx_out_file:
                    tx_out_prefix = os.path.splitext(tx_out_file)[0]
                    cmd = ("{bwa} mem -M -t {num_cores} -R '{rg_info}' -v 1 {ref_file} "
                           "{fastq_file} {pair_file} "
                           "| {samtools} view -b -S -u - "
                           "| {samtools} sort -@ {num_cores} -m {max_mem} - {tx_out_prefix}")
                    cmd = cmd.format(**locals())
                    do.run(cmd, "bwa mem alignment from fastq: %s" % names["sample"], None,
                           [do.file_nonempty(tx_out_file), do.file_reasonable_size(tx_out_file, fastq_file)])
    data["work_bam"] = out_file
    return data