Example #1
0
def main(snpeff_jar, vcf_ref, genome, interval_file=None):
    if os.path.isdir(vcf_ref):
        vcf_files = sorted(glob.glob(os.path.join(vcf_ref, "*-snp-filter.vcf")))
    else:
        vcf_files = [vcf_ref]
    for vcf_file in vcf_files:
        snpeff_effects(snpeff_jar, vcf_file, genome, interval_file)
Example #2
0
def variation_effects(vrn_file, genome_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.
    """
    snpeff_vcf, snpeff_txt = snpeff_effects(vrn_file, genome_build, config)
    annotated_vcf = annotate_effects(vrn_file, snpeff_vcf, genome_file, config) \
                    if snpeff_vcf else None
    return annotated_vcf, snpeff_txt
Example #3
0
def variation_effects(vrn_file, genome_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.

    Runs snpEff, returning the resulting effects file. No longer runs the GATK
    annotator, since it requires an old version of snpEff.
    """
    return snpeff_effects(vrn_file, genome_build, config)
Example #4
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        effect_todo = effects.get_type(data)
        if effect_todo:
            if effect_todo == "snpeff":
                ann_vrn_file = effects.snpeff_effects(data)
            elif effect_todo == "vep":
                ann_vrn_file = effects.run_vep(data)
            else:
                raise ValueError(
                    "Unexpected variant effects configuration: %s" %
                    effect_todo)
            if ann_vrn_file:
                data["vrn_file"] = ann_vrn_file
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], data["sam_ref"],
            tz.get_in(("genome_resources", "variation"), data, {}), data)
        logger.info("Prioritization for %s" % cur_name)
        data["vrn_file"] = prioritize.handle_vcf_calls(data["vrn_file"], data)
    return [[data]]
Example #5
0
def variation_effects(vrn_file, genome_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.
    """
    snpeff_vcf, snpeff_txt = snpeff_effects(vrn_file, genome_build, config)
    annotated_vcf = annotate_effects(vrn_file, snpeff_vcf, genome_file, config) \
                    if snpeff_vcf else None
    return annotated_vcf, snpeff_txt
Example #6
0
def variation_effects(vrn_file, genome_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.

    Runs snpEff, returning the resulting effects file. No longer runs the GATK
    annotator, since it requires an old version of snpEff.
    """
    return snpeff_effects(vrn_file, genome_build, config)
def main(config_file, env, cores):
    cores = int(cores)
    config = read_config(config_file, env)
    idremap = read_remap_file(config["runinfo"]["idmapping"])
    exclude = read_priority_file(config["runinfo"]["priority"], idremap)
    samples = list(get_input_samples(config["inputs"], idremap))
    problem = [x for x in samples if x["id"] is None]
    if len(problem) > 0:
        print "Problem identifiers"
        for p in problem:
            print p["illuminaid"], os.path.basename(p["dir"])
        raise NotImplementedError
    check_fam(samples, config["runinfo"]["fam"])

    config["algorithm"] = {"num_cores": cores}
    samples = [
        s for s in samples if s["id"] is not None and s["id"] not in exclude
    ]
    print "Processing %s samples" % len(samples)
    out_files = [
        outf for outf in joblib.Parallel(cores)(
            joblib.delayed(run_illumina_prep)(s, config) for s in samples)
    ]
    merge_file = merge_vcf_files(out_files, cores, config)
    effects_file = effects.snpeff_effects({
        "vrn_file": merge_file,
        "sam_ref": config["ref"]["GRCh37"],
        "reference": {
            "fasta": {
                "base": config["ref"]["GRCh37"]
            }
        },
        "genome_resources": {
            "aliases": {
                "snpeff": "GRCh37.74"
            }
        },
        "genome_build": "GRCh37",
        "config": config
    })
    data = {"config": config, "dirs": {"work": os.getcwd()}, "name": [""]}
    gemini_db = population.prep_gemini_db(
        [os.path.join(os.getcwd(), effects_file)],
        [utils.splitext_plus(config["outputs"]["merge"])[0], "casava", True],
        [{
            "config": config,
            "work_bam": "yes",
            "genome_build": "GRCh37",
            "genome_resources": {
                "aliases": {
                    "human": True
                }
            }
        }], data)[0][1]["db"]
    print gemini_db
    noexclude_file = "%s-noexclude%s" % utils.splitext_plus(effects_file)
    noexclude_file = vcfutils.exclude_samples(effects_file, noexclude_file,
                                              exclude, config["ref"]["GRCh37"],
                                              config)
    prepare_plink_vcftools(noexclude_file, config)
Example #8
0
def variation_effects(vrn_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.
    """
    snpeff_jar = os.path.join(config["program"]["snpEff"], "snpEff.jar")
    java_memory = config["algorithm"].get("java_memory", None)
    return snpeff_effects(snpeff_jar, vrn_file, genome_build,
                          config["algorithm"].get("hybrid_target", None),
                          java_memory)
Example #9
0
def main(dirname, config, cores):
    vcf_files = find_vcf_files(dirname)
    prepped_files = prep_vcf_files(vcf_files, cores, config)
    merged_file = merge_vcf_files(prepped_files, cores, config)
    effects_file = effects.snpeff_effects({"vrn_file": merged_file,
                                           "genome_resources": {"aliases" : {"snpeff": "GRCh37"}},
                                           "genome_build": "GRCh37",
                                           "config": config})

    gemini_db = load_gemini_db(effects_file, config["ped"], cores)
Example #10
0
def postprocess_variants(data):
    """Provide post-processing of variant calls.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"] and data.get("vrn_file"):
        data["vrn_file"] = finalize_genotyper(data["vrn_file"], data["work_bam"],
                                              data["sam_ref"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Example #11
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"] and data.get("vrn_file"):
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], data["sam_ref"],
            data["genome_resources"]["variation"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Example #12
0
def postprocess_variants(data):
    """Provide post-processing of variant calls.
    """
    if data["config"]["algorithm"]["snpcall"]:
        logger.info("Finalizing variant calls: %s" % str(data["name"]))
        data["vrn_file"] = finalize_genotyper(data["vrn_file"], data["work_bam"],
                                              data["sam_ref"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data["vrn_file"], data["genome_build"],
                                              data["config"])
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Example #13
0
def postprocess_variants(data):
    """Provide post-processing of variant calls.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"] and data.get("vrn_file"):
        data["vrn_file"] = finalize_genotyper(data["vrn_file"],
                                              data["work_bam"],
                                              data["sam_ref"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Example #14
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"] and data.get("vrn_file"):
        vrn_files = configured_vrn_files(data["config"], data["sam_ref"])
        data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"], vrn_files,
                                              data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Example #15
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data["work_bam"] and data.get("vrn_file"):
        data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"],
                                              data["genome_resources"]["variation"],
                                              data["config"])
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Example #16
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], data["sam_ref"],
            data["genome_resources"]["variation"], data)
    return [[data]]
Example #17
0
def postprocess_variants(data):
    """Provide post-processing of variant calls.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"]:
        data["vrn_file"] = finalize_genotyper(data["vrn_file"],
                                              data["work_bam"],
                                              data["sam_ref"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data["vrn_file"],
                                              data["genome_build"],
                                              data["config"])
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    data = validate.compare_to_rm(data)
    return [[data]]
Example #18
0
def main(dirname, config, cores):
    vcf_files = find_vcf_files(dirname)
    prepped_files = prep_vcf_files(vcf_files, cores, config)
    merged_file = merge_vcf_files(prepped_files, cores, config)
    effects_file = effects.snpeff_effects({
        "vrn_file": merged_file,
        "genome_resources": {
            "aliases": {
                "snpeff": "GRCh37"
            }
        },
        "genome_build": "GRCh37",
        "config": config
    })

    gemini_db = load_gemini_db(effects_file, config["ped"], cores)
Example #19
0
def _run_ensemble_w_caller(batch_id, vrn_files, bam_files, base_dir, edata):
    """Run ensemble method using a variant caller to handle re-calling the inputs.

    Uses bcbio.variation.recall method plus an external variantcaller.
    """
    out_vcf_file = os.path.join(base_dir, "{0}-ensemble.vcf".format(batch_id))
    if not utils.file_exists(out_vcf_file):
        caller = edata["config"]["algorithm"]["ensemble"]["caller"]
        cmd = [config_utils.get_program("bcbio-variation-recall", edata["config"]),
               "ensemble", "--cores=%s" % edata["config"]["algorithm"].get("num_cores", 1),
               "--caller=%s" % caller,
               out_vcf_file, edata["sam_ref"]] + vrn_files + bam_files
        do.run(cmd, "Ensemble calling with %s: %s" % (caller, batch_id))
    in_data = copy.deepcopy(edata)
    in_data["vrn_file"] = out_vcf_file
    effects_vcf = effects.snpeff_effects(in_data)
    return {"variantcaller": "ensemble",
            "vrn_file": effects_vcf,
            "bed_file": None}
Example #20
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        effect_todo = tz.get_in(("config", "algorithm", "effects"), data, "snpeff")
        if effect_todo:
            if effect_todo == "snpeff":
                ann_vrn_file = effects.snpeff_effects(data)
            elif effect_todo == "vep":
                ann_vrn_file = effects.run_vep(data)
            else:
                raise ValueError("Unexpected variant effects configuration: %s" % effect_todo)
            if ann_vrn_file:
                data["vrn_file"] = ann_vrn_file
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"],
                                              tz.get_in(("genome_resources", "variation"), data, {}),
                                              data)
    return [[data]]
Example #21
0
def _run_ensemble_w_caller(batch_id, vrn_files, bam_files, base_dir, edata):
    """Run ensemble method using a variant caller to handle re-calling the inputs.

    Uses bcbio.variation.recall method plus an external variantcaller.
    """
    out_vcf_file = os.path.join(base_dir, "{0}-ensemble.vcf".format(batch_id))
    if not utils.file_exists(out_vcf_file):
        caller = edata["config"]["algorithm"]["ensemble"]["caller"]
        cmd = [
            config_utils.get_program("bcbio-variation-recall",
                                     edata["config"]), "ensemble",
            "--cores=%s" % edata["config"]["algorithm"].get("num_cores", 1),
            "--caller=%s" % caller, out_vcf_file, edata["sam_ref"]
        ] + vrn_files + bam_files
        do.run(cmd, "Ensemble calling with %s: %s" % (caller, batch_id))
    in_data = copy.deepcopy(edata)
    in_data["vrn_file"] = out_vcf_file
    effects_vcf = effects.snpeff_effects(in_data)
    return {
        "variantcaller": "ensemble",
        "vrn_file": effects_vcf,
        "bed_file": None
    }
def main(config_file, env, cores):
    cores = int(cores)
    config = read_config(config_file, env)
    idremap = read_remap_file(config["runinfo"]["idmapping"])
    exclude = read_priority_file(config["runinfo"]["priority"], idremap)
    samples = list(get_input_samples(config["inputs"], idremap))
    problem = [x for x in samples if x["id"] is None]
    if len(problem) > 0:
        print "Problem identifiers"
        for p in problem:
            print p["illuminaid"], os.path.basename(p["dir"])
        raise NotImplementedError
    check_fam(samples, config["runinfo"]["fam"])

    config["algorithm"] = {"num_cores": cores}
    samples = [s for s in samples if s["id"] is not None and s["id"] not in exclude]
    print "Processing %s samples" % len(samples)
    out_files = [outf for outf in joblib.Parallel(cores)(joblib.delayed(run_illumina_prep)(s, config)
                                                         for s in samples)]
    merge_file = merge_vcf_files(out_files, cores, config)
    effects_file = effects.snpeff_effects({"vrn_file": merge_file,
                                           "sam_ref": config["ref"]["GRCh37"],
                                           "reference": {"fasta" : {"base": config["ref"]["GRCh37"]}},
                                           "genome_resources": {"aliases" : {"snpeff": "GRCh37.74"}},
                                           "genome_build": "GRCh37",
                                           "config": config})
    data = {"config": config, "dirs": {"work": os.getcwd()}, "name": [""]}
    gemini_db = population.prep_gemini_db([os.path.join(os.getcwd(), effects_file)],
                                          [utils.splitext_plus(config["outputs"]["merge"])[0], "casava", True],
                                          [{"config": config, "work_bam": "yes", "genome_build": "GRCh37",
                                            "genome_resources": {"aliases": {"human": True}}}],
                                          data)[0][1]["db"]
    print gemini_db
    noexclude_file = "%s-noexclude%s" % utils.splitext_plus(effects_file)
    noexclude_file = vcfutils.exclude_samples(effects_file, noexclude_file, exclude,
                                              config["ref"]["GRCh37"], config)
    prepare_plink_vcftools(noexclude_file, config)
Example #23
0
 def __call__(self, in_file):
     self._start_message(in_file)
     out_file = effects.snpeff_effects(in_file, self.genome, self.config)
     self._end_message(in_file)
     return out_file
Example #24
0
 def __call__(self, in_file):
     self._start_message(in_file)
     out_file = effects.snpeff_effects(in_file, self.genome, self.config)
     self._end_message(in_file)
     return out_file