Esempio n. 1
0
def postprocess_variants(items):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    data = _get_batch_representative(items, "vrn_file")
    cur_name = "%s, %s" % (dd.get_sample_name(data), get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    orig_vrn_file = data.get("vrn_file")
    data = _symlink_to_workdir(data, ["vrn_file"])
    data = _symlink_to_workdir(data,
                               ["config", "algorithm", "variant_regions"])
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file, vrn_stats = effects.add_to_vcf(data["vrn_file"], data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
        if vrn_stats:
            data["vrn_stats"] = vrn_stats
        orig_items = _get_orig_items(items)
        logger.info("Annotate VCF file: %s" % cur_name)
        data["vrn_file"] = annotation.finalize_vcf(data["vrn_file"],
                                                   get_variantcaller(data),
                                                   orig_items)
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], dd.get_ref_file(data),
            tz.get_in(("genome_resources", "variation"), data, {}), data,
            orig_items)
        logger.info("Prioritization for %s" % cur_name)
        data["vrn_file"] = prioritize.handle_vcf_calls(data["vrn_file"], data,
                                                       orig_items)
        logger.info("Germline extraction for %s" % cur_name)
        data = germline.extract(data, orig_items)
    if orig_vrn_file and os.path.samefile(data["vrn_file"], orig_vrn_file):
        data["vrn_file"] = orig_vrn_file
    return [[data]]
Esempio n. 2
0
def postprocess_variants(items):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    data = _get_batch_representative(items, "vrn_file")
    cur_name = "%s, %s" % (dd.get_sample_name(data), get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    orig_vrn_file = data.get("vrn_file")
    data = _symlink_to_workdir(data, ["vrn_file"])
    data = _symlink_to_workdir(data, ["config", "algorithm", "variant_regions"])
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file, vrn_stats = effects.add_to_vcf(data["vrn_file"], data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
        if vrn_stats:
            data["vrn_stats"] = vrn_stats
        logger.info("Filtering for %s" % cur_name)
        orig_items = _get_orig_items(items)
        data["vrn_file"] = variant_filtration(data["vrn_file"], dd.get_ref_file(data),
                                              tz.get_in(("genome_resources", "variation"), data, {}),
                                              data, orig_items)
        logger.info("Prioritization for %s" % cur_name)
        data["vrn_file"] = prioritize.handle_vcf_calls(data["vrn_file"], data, orig_items)
        logger.info("Germline extraction for %s" % cur_name)
        data = germline.extract(data, orig_items)
    if orig_vrn_file and os.path.samefile(data["vrn_file"], orig_vrn_file):
        data["vrn_file"] = orig_vrn_file
    return [[data]]
Esempio n. 3
0
def postprocess_variants(items):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    vrn_key = "vrn_file"
    if not isinstance(items, dict):
        items = [utils.to_single_data(x) for x in items]
        if "vrn_file_joint" in items[0]:
            vrn_key = "vrn_file_joint"
    data, items = _get_batch_representative(items, vrn_key)
    items = cwlutils.unpack_tarballs(items, data)
    data = cwlutils.unpack_tarballs(data, data)
    cur_name = "%s, %s" % (dd.get_sample_name(data), get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    orig_vrn_file = data.get(vrn_key)
    data = _symlink_to_workdir(data, [vrn_key])
    data = _symlink_to_workdir(data,
                               ["config", "algorithm", "variant_regions"])
    if data.get(vrn_key):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file, vrn_stats = effects.add_to_vcf(data[vrn_key], data)
        if ann_vrn_file:
            data[vrn_key] = ann_vrn_file
        if vrn_stats:
            data["vrn_stats"] = vrn_stats
        orig_items = _get_orig_items(items)
        logger.info("Annotate VCF file: %s" % cur_name)
        data[vrn_key] = annotation.finalize_vcf(data[vrn_key],
                                                get_variantcaller(data),
                                                orig_items)
        if dd.get_analysis(data).lower().find("rna-seq") >= 0:
            logger.info("Annotate RNA editing sites")
            ann_file = vcfanno.run_vcfanno(dd.get_vrn_file(data), ["rnaedit"],
                                           data)
            if ann_file:
                data[vrn_key] = ann_file
        if cwlutils.is_cwl_run(data):
            logger.info("Annotate with population level variation data")
            ann_file = population.run_vcfanno(dd.get_vrn_file(data), data,
                                              population.do_db_build([data]))
            if ann_file:
                data[vrn_key] = ann_file
        logger.info("Filtering for %s" % cur_name)
        data[vrn_key] = variant_filtration(
            data[vrn_key], dd.get_ref_file(data),
            tz.get_in(("genome_resources", "variation"), data, {}), data,
            orig_items)
        logger.info("Prioritization for %s" % cur_name)
        prio_vrn_file = prioritize.handle_vcf_calls(data[vrn_key], data,
                                                    orig_items)
        if prio_vrn_file != data[vrn_key]:
            data[vrn_key] = prio_vrn_file
            logger.info("Germline extraction for %s" % cur_name)
            data = germline.extract(data, orig_items)

        if dd.get_align_bam(data):
            data = damage.run_filter(data[vrn_key], dd.get_align_bam(data),
                                     dd.get_ref_file(data), data, orig_items)
    if orig_vrn_file and os.path.samefile(data[vrn_key], orig_vrn_file):
        data[vrn_key] = orig_vrn_file
    return [[data]]
Esempio n. 4
0
def postprocess_variants(items):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    vrn_key = "vrn_file"
    if not isinstance(items, dict):
        items = [utils.to_single_data(x) for x in items]
        if "vrn_file_joint" in items[0]:
            vrn_key = "vrn_file_joint"
    data, items = _get_batch_representative(items, vrn_key)
    items = cwlutils.unpack_tarballs(items, data)
    data = cwlutils.unpack_tarballs(data, data)
    cur_name = "%s, %s" % (dd.get_sample_name(data), get_variantcaller(data, require_bam=False))
    logger.info("Finalizing variant calls: %s" % cur_name)
    orig_vrn_file = data.get(vrn_key)
    data = _symlink_to_workdir(data, [vrn_key])
    data = _symlink_to_workdir(data, ["config", "algorithm", "variant_regions"])
    if data.get(vrn_key):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file, vrn_stats = effects.add_to_vcf(data[vrn_key], data)
        if ann_vrn_file:
            data[vrn_key] = ann_vrn_file
        if vrn_stats:
            data["vrn_stats"] = vrn_stats
        orig_items = _get_orig_items(items)
        logger.info("Annotate VCF file: %s" % cur_name)
        data[vrn_key] = annotation.finalize_vcf(data[vrn_key], get_variantcaller(data, require_bam=False),
                                                orig_items)
        if cwlutils.is_cwl_run(data):
            logger.info("Annotate with population level variation data")
            ann_file = population.run_vcfanno(data[vrn_key], data)
            if ann_file:
                data[vrn_key] = ann_file
        logger.info("Filtering for %s" % cur_name)
        data[vrn_key] = variant_filtration(data[vrn_key], dd.get_ref_file(data),
                                           tz.get_in(("genome_resources", "variation"), data, {}),
                                           data, orig_items)
        logger.info("Prioritization for %s" % cur_name)
        prio_vrn_file = prioritize.handle_vcf_calls(data[vrn_key], data, orig_items)
        if prio_vrn_file != data[vrn_key]:
            data[vrn_key] = prio_vrn_file
            logger.info("Germline extraction for %s" % cur_name)
            data = germline.extract(data, orig_items)

        if dd.get_align_bam(data):
            data = damage.run_filter(data[vrn_key], dd.get_align_bam(data), dd.get_ref_file(data),
                                     data, orig_items)
    if orig_vrn_file and os.path.samefile(data[vrn_key], orig_vrn_file):
        data[vrn_key] = orig_vrn_file
    return [[data]]
Esempio n. 5
0
def extract_germline_vcinfo(data, out_dir):
    """Extract germline VCFs from existing tumor inputs.
    """
    supported_germline = set(["vardict", "octopus", "freebayes"])
    if dd.get_phenotype(data) in ["tumor"]:
        for v in _get_variants(data):
            if v.get("variantcaller") in supported_germline:
                if v.get("germline"):
                    return v
                else:
                    d = utils.deepish_copy(data)
                    d["vrn_file"] = v["vrn_file"]
                    gd = germline.extract(d, [d], out_dir)
                    v["germline"] = gd["vrn_file_plus"]["germline"]
                    return v
Esempio n. 6
0
def extract_germline_vcinfo(data, out_dir):
    """Extract germline VCFs from existing tumor inputs.
    """
    supported_germline = set(["vardict", "octopus", "freebayes"])
    if dd.get_phenotype(data) in ["tumor"]:
        for v in _get_variants(data):
            if v.get("variantcaller") in supported_germline:
                if v.get("germline"):
                    return v
                else:
                    d = utils.deepish_copy(data)
                    d["vrn_file"] = v["vrn_file"]
                    gd = germline.extract(d, [d], out_dir)
                    v["germline"] = gd["vrn_file_plus"]["germline"]
                    return v
Esempio n. 7
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (dd.get_sample_name(data), get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file, vrn_stats = effects.add_to_vcf(data["vrn_file"], data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
        if vrn_stats:
            data["vrn_stats"] = vrn_stats
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"],
                                              tz.get_in(("genome_resources", "variation"), data, {}),
                                              data)
        logger.info("Prioritization for %s" % cur_name)
        data["vrn_file"] = prioritize.handle_vcf_calls(data["vrn_file"], data)
        logger.info("Germline extraction for %s" % cur_name)
        data = germline.extract(data)
    return [[data]]