Exemple #1
0
def parallel_variantcall_region(samples, run_parallel):
    """Perform variant calling and post-analysis on samples by region.
    """
    to_process = []
    extras = []
    to_group = []
    for x in samples:
        added = False
        for add in genotype.handle_multiple_variantcallers(x):
            added = True
            to_process.append(add)
        if not added:
            if "combine" in x[0] and x[0]["combine"].keys()[0] in x[0]:
                assert len(x) == 1
                to_group.append(x[0])
            else:
                extras.append(x)
    split_fn = _split_by_ready_regions("-variants.vcf.gz", "work_bam", genotype.get_variantcaller)
    if len(to_group) > 0:
        extras += group_combine_parts(to_group)
    return extras + grouped_parallel_split_combine(to_process, split_fn,
                                                   multi.group_batches, run_parallel,
                                                   "variantcall_sample", "split_variants_by_sample",
                                                   "concat_variant_files",
                                                   "vrn_file", ["region", "sam_ref", "config"])
Exemple #2
0
def parallel_variantcall_region(samples, run_parallel):
    """Perform variant calling and post-analysis on samples by region.
    """
    to_process = []
    extras = []
    to_group = []
    for x in samples:
        added = False
        for add in genotype.handle_multiple_variantcallers(x):
            added = True
            to_process.append(add)
        if not added:
            if "combine" in x[0] and x[0]["combine"].keys()[0] in x[0]:
                assert len(x) == 1
                to_group.append(x[0])
            else:
                extras.append(x)
    split_fn = _split_by_ready_regions("-variants.vcf", "work_bam",
                                       genotype.get_variantcaller)
    if len(to_group) > 0:
        extras += group_combine_parts(to_group)
    return extras + grouped_parallel_split_combine(
        to_process, split_fn, multi.group_batches, run_parallel,
        "variantcall_sample", "split_variants_by_sample",
        "concat_variant_files", "vrn_file", ["region", "sam_ref", "config"])
Exemple #3
0
def parallel_variantcall_region(samples, run_parallel):
    """Perform variant calling and post-analysis on samples by region.
    """
    to_process = []
    extras = []
    for x in samples:
        added = False
        for add in handle_multiple_variantcallers(x):
            added = True
            to_process.append(add)
        if not added:
            extras.append(x)
    split_fn = _split_by_ready_regions(".vcf.gz", "work_bam", get_variantcaller)
    samples = _collapse_by_bam_variantcaller(
        grouped_parallel_split_combine(
            to_process,
            split_fn,
            multi.group_batches,
            run_parallel,
            "variantcall_sample",
            "concat_variant_files",
            "vrn_file",
            ["region", "sam_ref", "config"],
        )
    )
    return extras + samples
Exemple #4
0
def parallel_variantcall_region(samples, run_parallel):
    """Perform variant calling and post-analysis on samples by region.
    """
    split_fn = _split_by_ready_regions("-variants.vcf", "work_bam", genotype.get_variantcaller)
    return grouped_parallel_split_combine(samples, split_fn,
                                          multi.group_batches, run_parallel,
                                          "variantcall_sample", "split_variants_by_sample",
                                          "combine_variant_files",
                                          "vrn_file", ["sam_ref", "config"])
Exemple #5
0
def parallel_variantcall_region(samples, run_parallel):
    """Perform variant calling and post-analysis on samples by region.
    """
    to_process, extras = _dup_samples_by_variantcaller(samples)
    split_fn = _split_by_ready_regions(".vcf.gz", "work_bam", get_variantcaller)
    samples = _collapse_by_bam_variantcaller(
        grouped_parallel_split_combine(to_process, split_fn,
                                       multi.group_batches, run_parallel,
                                       "variantcall_sample", "concat_variant_files",
                                       "vrn_file", ["region", "sam_ref", "config"]))
    return extras + samples
Exemple #6
0
def parallel_variantcall_region(samples, run_parallel):
    """Perform variant calling and post-analysis on samples by region.
    """
    to_process, extras = _dup_samples_by_variantcaller(samples)
    split_fn = _split_by_ready_regions(".vcf.gz", "work_bam", get_variantcaller)
    samples = _collapse_by_bam_variantcaller(
        grouped_parallel_split_combine(to_process, split_fn,
                                       multi.group_batches, run_parallel,
                                       "variantcall_sample", "concat_variant_files",
                                       "vrn_file", ["region", "sam_ref", "config"]))
    return extras + samples
Exemple #7
0
def parallel_variantcall_region(samples, run_parallel):
    """Perform variant calling and post-analysis on samples by region.
    """
    to_process = []
    for x in samples:
        to_process.extend(genotype.handle_multiple_variantcallers(x))
    split_fn = _split_by_ready_regions("-variants.vcf", "work_bam",
                                       genotype.get_variantcaller)
    return grouped_parallel_split_combine(to_process, split_fn,
                                          multi.group_batches, run_parallel,
                                          "variantcall_sample",
                                          "split_variants_by_sample",
                                          "combine_variant_files", "vrn_file",
                                          ["sam_ref", "config"])
Exemple #8
0
def square_off(samples, run_parallel):
    """Perform joint calling at all variants within a batch.
    """
    to_process = []
    extras = []
    for data in [x[0] for x in samples]:
        jointcaller = tz.get_in(("config", "algorithm", "jointcaller"), data)
        batch = tz.get_in(("metadata", "batch"), data)
        if jointcaller and batch:
            to_process.append([data])
        else:
            extras.append([data])
    processed = grouped_parallel_split_combine(to_process, _split_by_callable_region,
                                               multi.group_batches_joint, run_parallel,
                                               "square_batch_region", "concat_variant_files",
                                               "vrn_file", ["region", "sam_ref", "config"])
    return _combine_to_jointcaller(processed) + extras
Exemple #9
0
def square_off(samples, run_parallel):
    """Perform joint calling at all variants within a batch.
    """
    to_process = []
    extras = []
    for data in [x[0] for x in samples]:
        jointcaller = tz.get_in(("config", "algorithm", "jointcaller"), data)
        batch = tz.get_in(("metadata", "batch"), data)
        if jointcaller and batch:
            to_process.append([data])
        else:
            extras.append([data])
    processed = grouped_parallel_split_combine(
        to_process, _split_by_callable_region, multi.group_batches_joint,
        run_parallel, "square_batch_region", "concat_variant_files",
        "vrn_file", ["region", "sam_ref", "config"])
    return _combine_to_jointcaller(processed) + extras
Exemple #10
0
def parallel_variantcall_region(samples, run_parallel):
    """Perform variant calling and post-analysis on samples by region.
    """
    to_process = []
    extras = []
    for x in samples:
        added = False
        for add in genotype.handle_multiple_variantcallers(x):
            added = True
            to_process.append(add)
        if not added:
            extras.append(x)
    split_fn = _split_by_ready_regions("-variants.vcf", "work_bam",
                                       genotype.get_variantcaller)
    return extras + grouped_parallel_split_combine(
        to_process, split_fn, multi.group_batches, run_parallel,
        "variantcall_sample", "split_variants_by_sample",
        "concat_variant_files", "vrn_file", ["region", "sam_ref", "config"])
Exemple #11
0
def square_off(samples, run_parallel):
    """Perform joint calling at all variants within a batch.
    """
    to_process = []
    extras = []
    for data in [x[0] for x in samples]:
        added = False
        if tz.get_in(("metadata", "batch"), data):
            for add in genotype.handle_multiple_callers(data, "jointcaller"):
                if _is_jointcaller_compatible(add):
                    added = True
                    to_process.append([add])
        if not added:
            extras.append([data])
    processed = grouped_parallel_split_combine(
        to_process, _split_by_callable_region, multi.group_batches_joint,
        run_parallel, "square_batch_region", "concat_variant_files",
        "vrn_file", ["region", "sam_ref", "config"])
    return _combine_to_jointcaller(processed) + extras
Exemple #12
0
def parallel_variantcall(sample_info, parallel_fn):
    """Provide sample genotyping, running in parallel over individual chromosomes.
    """
    to_process = []
    finished = []
    for x in sample_info:
        if get_variantcaller(x[0]):
            to_process.extend(handle_multiple_variantcallers(x))
        else:
            finished.append(x)
    if len(to_process) > 0:
        split_fn = process_bam_by_chromosome("-variants.vcf", "work_bam",
                                             dir_ext_fn=get_variantcaller)
        processed = grouped_parallel_split_combine(
            to_process, split_fn, multi.group_batches, parallel_fn,
            "variantcall_sample", "split_variants_by_sample", "combine_variant_files",
            "vrn_file", ["sam_ref", "config"])
        finished.extend(processed)
    return finished
Exemple #13
0
def square_off(samples, run_parallel):
    """Perform joint calling at all variants within a batch.
    """
    to_process = []
    extras = []
    for data in [x[0] for x in samples]:
        added = False
        if tz.get_in(("metadata", "batch"), data):
            for add in genotype.handle_multiple_callers(data, "jointcaller"):
                if _is_jointcaller_compatible(add):
                    added = True
                    to_process.append([add])
        if not added:
            extras.append([data])
    processed = grouped_parallel_split_combine(to_process, _split_by_callable_region,
                                               multi.group_batches_joint, run_parallel,
                                               "square_batch_region", "concat_variant_files",
                                               "vrn_file", ["region", "sam_ref", "config"])
    return _combine_to_jointcaller(processed) + extras
Exemple #14
0
def parallel_variantcall(sample_info, parallel_fn):
    """Provide sample genotyping, running in parallel over individual chromosomes.
    """
    to_process = []
    finished = []
    for x in sample_info:
        if get_variantcaller(x[0]):
            to_process.extend(handle_multiple_variantcallers(x))
        else:
            finished.append(x)
    if len(to_process) > 0:
        split_fn = process_bam_by_chromosome("-variants.vcf", "work_bam",
                                             dir_ext_fn=get_variantcaller)
        processed = grouped_parallel_split_combine(
            to_process, split_fn, multi.group_batches, parallel_fn,
            "variantcall_sample", "split_variants_by_sample", "combine_variant_files",
            "vrn_file", ["sam_ref", "config"])
        finished.extend(processed)
    return finished
Exemple #15
0
def parallel_variantcall_region(samples, run_parallel):
    """Perform variant calling and post-analysis on samples by region.
    """
    to_process = []
    extras = []
    for data in [x[0] for x in samples]:
        added = False
        for add in handle_multiple_callers(data, "variantcaller", "gatk"):
            added = True
            to_process.append([add])
        if not added:
            extras.append([data])
    split_fn = _split_by_ready_regions(".vcf.gz", "work_bam", get_variantcaller)
    samples = _collapse_by_bam_variantcaller(
        grouped_parallel_split_combine(to_process, split_fn,
                                       multi.group_batches, run_parallel,
                                       "variantcall_sample", "concat_variant_files",
                                       "vrn_file", ["region", "sam_ref", "config"]))
    return extras + samples