コード例 #1
0
def main(snpeff_jar, vcf_ref, genome, interval_file=None):
    if os.path.isdir(vcf_ref):
        vcf_files = sorted(glob.glob(os.path.join(vcf_ref, "*-snp-filter.vcf")))
    else:
        vcf_files = [vcf_ref]
    for vcf_file in vcf_files:
        snpeff_effects(snpeff_jar, vcf_file, genome, interval_file)
コード例 #2
0
ファイル: variation.py プロジェクト: rwness/bcbb
def variation_effects(vrn_file, genome_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.
    """
    snpeff_vcf, snpeff_txt = snpeff_effects(vrn_file, genome_build, config)
    annotated_vcf = annotate_effects(vrn_file, snpeff_vcf, genome_file, config) \
                    if snpeff_vcf else None
    return annotated_vcf, snpeff_txt
コード例 #3
0
ファイル: variation.py プロジェクト: jme9/wabio
def variation_effects(vrn_file, genome_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.

    Runs snpEff, returning the resulting effects file. No longer runs the GATK
    annotator, since it requires an old version of snpEff.
    """
    return snpeff_effects(vrn_file, genome_build, config)
コード例 #4
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        effect_todo = effects.get_type(data)
        if effect_todo:
            if effect_todo == "snpeff":
                ann_vrn_file = effects.snpeff_effects(data)
            elif effect_todo == "vep":
                ann_vrn_file = effects.run_vep(data)
            else:
                raise ValueError(
                    "Unexpected variant effects configuration: %s" %
                    effect_todo)
            if ann_vrn_file:
                data["vrn_file"] = ann_vrn_file
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], data["sam_ref"],
            tz.get_in(("genome_resources", "variation"), data, {}), data)
        logger.info("Prioritization for %s" % cur_name)
        data["vrn_file"] = prioritize.handle_vcf_calls(data["vrn_file"], data)
    return [[data]]
コード例 #5
0
ファイル: variation.py プロジェクト: mcicdata/bcbb
def variation_effects(vrn_file, genome_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.
    """
    snpeff_vcf, snpeff_txt = snpeff_effects(vrn_file, genome_build, config)
    annotated_vcf = annotate_effects(vrn_file, snpeff_vcf, genome_file, config) \
                    if snpeff_vcf else None
    return annotated_vcf, snpeff_txt
コード例 #6
0
ファイル: variation.py プロジェクト: 16NWallace/bcbb
def variation_effects(vrn_file, genome_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.

    Runs snpEff, returning the resulting effects file. No longer runs the GATK
    annotator, since it requires an old version of snpEff.
    """
    return snpeff_effects(vrn_file, genome_build, config)
コード例 #7
0
def main(config_file, env, cores):
    cores = int(cores)
    config = read_config(config_file, env)
    idremap = read_remap_file(config["runinfo"]["idmapping"])
    exclude = read_priority_file(config["runinfo"]["priority"], idremap)
    samples = list(get_input_samples(config["inputs"], idremap))
    problem = [x for x in samples if x["id"] is None]
    if len(problem) > 0:
        print "Problem identifiers"
        for p in problem:
            print p["illuminaid"], os.path.basename(p["dir"])
        raise NotImplementedError
    check_fam(samples, config["runinfo"]["fam"])

    config["algorithm"] = {"num_cores": cores}
    samples = [
        s for s in samples if s["id"] is not None and s["id"] not in exclude
    ]
    print "Processing %s samples" % len(samples)
    out_files = [
        outf for outf in joblib.Parallel(cores)(
            joblib.delayed(run_illumina_prep)(s, config) for s in samples)
    ]
    merge_file = merge_vcf_files(out_files, cores, config)
    effects_file = effects.snpeff_effects({
        "vrn_file": merge_file,
        "sam_ref": config["ref"]["GRCh37"],
        "reference": {
            "fasta": {
                "base": config["ref"]["GRCh37"]
            }
        },
        "genome_resources": {
            "aliases": {
                "snpeff": "GRCh37.74"
            }
        },
        "genome_build": "GRCh37",
        "config": config
    })
    data = {"config": config, "dirs": {"work": os.getcwd()}, "name": [""]}
    gemini_db = population.prep_gemini_db(
        [os.path.join(os.getcwd(), effects_file)],
        [utils.splitext_plus(config["outputs"]["merge"])[0], "casava", True],
        [{
            "config": config,
            "work_bam": "yes",
            "genome_build": "GRCh37",
            "genome_resources": {
                "aliases": {
                    "human": True
                }
            }
        }], data)[0][1]["db"]
    print gemini_db
    noexclude_file = "%s-noexclude%s" % utils.splitext_plus(effects_file)
    noexclude_file = vcfutils.exclude_samples(effects_file, noexclude_file,
                                              exclude, config["ref"]["GRCh37"],
                                              config)
    prepare_plink_vcftools(noexclude_file, config)
コード例 #8
0
ファイル: variation.py プロジェクト: hussius/bcbb
def variation_effects(vrn_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.
    """
    snpeff_jar = os.path.join(config["program"]["snpEff"], "snpEff.jar")
    java_memory = config["algorithm"].get("java_memory", None)
    return snpeff_effects(snpeff_jar, vrn_file, genome_build,
                          config["algorithm"].get("hybrid_target", None),
                          java_memory)
コード例 #9
0
ファイル: prep_adni_vcfs.py プロジェクト: Xiuying/projects
def main(dirname, config, cores):
    vcf_files = find_vcf_files(dirname)
    prepped_files = prep_vcf_files(vcf_files, cores, config)
    merged_file = merge_vcf_files(prepped_files, cores, config)
    effects_file = effects.snpeff_effects({"vrn_file": merged_file,
                                           "genome_resources": {"aliases" : {"snpeff": "GRCh37"}},
                                           "genome_build": "GRCh37",
                                           "config": config})

    gemini_db = load_gemini_db(effects_file, config["ped"], cores)
コード例 #10
0
ファイル: variation.py プロジェクト: dargorr/bcbio-nextgen
def postprocess_variants(data):
    """Provide post-processing of variant calls.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"] and data.get("vrn_file"):
        data["vrn_file"] = finalize_genotyper(data["vrn_file"], data["work_bam"],
                                              data["sam_ref"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
コード例 #11
0
ファイル: variation.py プロジェクト: yangjl/bcbio-nextgen
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"] and data.get("vrn_file"):
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], data["sam_ref"],
            data["genome_resources"]["variation"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
コード例 #12
0
ファイル: sample.py プロジェクト: luwening/bcbio-nextgen
def postprocess_variants(data):
    """Provide post-processing of variant calls.
    """
    if data["config"]["algorithm"]["snpcall"]:
        logger.info("Finalizing variant calls: %s" % str(data["name"]))
        data["vrn_file"] = finalize_genotyper(data["vrn_file"], data["work_bam"],
                                              data["sam_ref"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data["vrn_file"], data["genome_build"],
                                              data["config"])
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
コード例 #13
0
ファイル: variation.py プロジェクト: dargorr/bcbio-nextgen
def postprocess_variants(data):
    """Provide post-processing of variant calls.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"] and data.get("vrn_file"):
        data["vrn_file"] = finalize_genotyper(data["vrn_file"],
                                              data["work_bam"],
                                              data["sam_ref"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
コード例 #14
0
ファイル: variation.py プロジェクト: vezzi/bcbio-nextgen
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"] and data.get("vrn_file"):
        vrn_files = configured_vrn_files(data["config"], data["sam_ref"])
        data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"], vrn_files,
                                              data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
コード例 #15
0
ファイル: variation.py プロジェクト: brentp/bcbio-nextgen
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data["work_bam"] and data.get("vrn_file"):
        data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"],
                                              data["genome_resources"]["variation"],
                                              data["config"])
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
コード例 #16
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], data["sam_ref"],
            data["genome_resources"]["variation"], data)
    return [[data]]
コード例 #17
0
def postprocess_variants(data):
    """Provide post-processing of variant calls.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"]:
        data["vrn_file"] = finalize_genotyper(data["vrn_file"],
                                              data["work_bam"],
                                              data["sam_ref"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data["vrn_file"],
                                              data["genome_build"],
                                              data["config"])
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    data = validate.compare_to_rm(data)
    return [[data]]
コード例 #18
0
def main(dirname, config, cores):
    vcf_files = find_vcf_files(dirname)
    prepped_files = prep_vcf_files(vcf_files, cores, config)
    merged_file = merge_vcf_files(prepped_files, cores, config)
    effects_file = effects.snpeff_effects({
        "vrn_file": merged_file,
        "genome_resources": {
            "aliases": {
                "snpeff": "GRCh37"
            }
        },
        "genome_build": "GRCh37",
        "config": config
    })

    gemini_db = load_gemini_db(effects_file, config["ped"], cores)
コード例 #19
0
ファイル: ensemble.py プロジェクト: snewhouse/bcbio-nextgen
def _run_ensemble_w_caller(batch_id, vrn_files, bam_files, base_dir, edata):
    """Run ensemble method using a variant caller to handle re-calling the inputs.

    Uses bcbio.variation.recall method plus an external variantcaller.
    """
    out_vcf_file = os.path.join(base_dir, "{0}-ensemble.vcf".format(batch_id))
    if not utils.file_exists(out_vcf_file):
        caller = edata["config"]["algorithm"]["ensemble"]["caller"]
        cmd = [config_utils.get_program("bcbio-variation-recall", edata["config"]),
               "ensemble", "--cores=%s" % edata["config"]["algorithm"].get("num_cores", 1),
               "--caller=%s" % caller,
               out_vcf_file, edata["sam_ref"]] + vrn_files + bam_files
        do.run(cmd, "Ensemble calling with %s: %s" % (caller, batch_id))
    in_data = copy.deepcopy(edata)
    in_data["vrn_file"] = out_vcf_file
    effects_vcf = effects.snpeff_effects(in_data)
    return {"variantcaller": "ensemble",
            "vrn_file": effects_vcf,
            "bed_file": None}
コード例 #20
0
ファイル: variation.py プロジェクト: GetBen/bcbio-nextgen
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        effect_todo = tz.get_in(("config", "algorithm", "effects"), data, "snpeff")
        if effect_todo:
            if effect_todo == "snpeff":
                ann_vrn_file = effects.snpeff_effects(data)
            elif effect_todo == "vep":
                ann_vrn_file = effects.run_vep(data)
            else:
                raise ValueError("Unexpected variant effects configuration: %s" % effect_todo)
            if ann_vrn_file:
                data["vrn_file"] = ann_vrn_file
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"],
                                              tz.get_in(("genome_resources", "variation"), data, {}),
                                              data)
    return [[data]]
コード例 #21
0
ファイル: ensemble.py プロジェクト: senthil10/bcbio-nextgen
def _run_ensemble_w_caller(batch_id, vrn_files, bam_files, base_dir, edata):
    """Run ensemble method using a variant caller to handle re-calling the inputs.

    Uses bcbio.variation.recall method plus an external variantcaller.
    """
    out_vcf_file = os.path.join(base_dir, "{0}-ensemble.vcf".format(batch_id))
    if not utils.file_exists(out_vcf_file):
        caller = edata["config"]["algorithm"]["ensemble"]["caller"]
        cmd = [
            config_utils.get_program("bcbio-variation-recall",
                                     edata["config"]), "ensemble",
            "--cores=%s" % edata["config"]["algorithm"].get("num_cores", 1),
            "--caller=%s" % caller, out_vcf_file, edata["sam_ref"]
        ] + vrn_files + bam_files
        do.run(cmd, "Ensemble calling with %s: %s" % (caller, batch_id))
    in_data = copy.deepcopy(edata)
    in_data["vrn_file"] = out_vcf_file
    effects_vcf = effects.snpeff_effects(in_data)
    return {
        "variantcaller": "ensemble",
        "vrn_file": effects_vcf,
        "bed_file": None
    }
コード例 #22
0
def main(config_file, env, cores):
    cores = int(cores)
    config = read_config(config_file, env)
    idremap = read_remap_file(config["runinfo"]["idmapping"])
    exclude = read_priority_file(config["runinfo"]["priority"], idremap)
    samples = list(get_input_samples(config["inputs"], idremap))
    problem = [x for x in samples if x["id"] is None]
    if len(problem) > 0:
        print "Problem identifiers"
        for p in problem:
            print p["illuminaid"], os.path.basename(p["dir"])
        raise NotImplementedError
    check_fam(samples, config["runinfo"]["fam"])

    config["algorithm"] = {"num_cores": cores}
    samples = [s for s in samples if s["id"] is not None and s["id"] not in exclude]
    print "Processing %s samples" % len(samples)
    out_files = [outf for outf in joblib.Parallel(cores)(joblib.delayed(run_illumina_prep)(s, config)
                                                         for s in samples)]
    merge_file = merge_vcf_files(out_files, cores, config)
    effects_file = effects.snpeff_effects({"vrn_file": merge_file,
                                           "sam_ref": config["ref"]["GRCh37"],
                                           "reference": {"fasta" : {"base": config["ref"]["GRCh37"]}},
                                           "genome_resources": {"aliases" : {"snpeff": "GRCh37.74"}},
                                           "genome_build": "GRCh37",
                                           "config": config})
    data = {"config": config, "dirs": {"work": os.getcwd()}, "name": [""]}
    gemini_db = population.prep_gemini_db([os.path.join(os.getcwd(), effects_file)],
                                          [utils.splitext_plus(config["outputs"]["merge"])[0], "casava", True],
                                          [{"config": config, "work_bam": "yes", "genome_build": "GRCh37",
                                            "genome_resources": {"aliases": {"human": True}}}],
                                          data)[0][1]["db"]
    print gemini_db
    noexclude_file = "%s-noexclude%s" % utils.splitext_plus(effects_file)
    noexclude_file = vcfutils.exclude_samples(effects_file, noexclude_file, exclude,
                                              config["ref"]["GRCh37"], config)
    prepare_plink_vcftools(noexclude_file, config)
コード例 #23
0
ファイル: stages.py プロジェクト: anindya028/bipy
 def __call__(self, in_file):
     self._start_message(in_file)
     out_file = effects.snpeff_effects(in_file, self.genome, self.config)
     self._end_message(in_file)
     return out_file
コード例 #24
0
 def __call__(self, in_file):
     self._start_message(in_file)
     out_file = effects.snpeff_effects(in_file, self.genome, self.config)
     self._end_message(in_file)
     return out_file