def add_reference_resources(data): """Add genome reference information to the item to process. """ aligner = data["config"]["algorithm"].get("aligner", None) data["reference"] = genome.get_refs(data["genome_build"], aligner, data["dirs"]["galaxy"]) # back compatible `sam_ref` target data["sam_ref"] = utils.get_in(data, ("reference", "fasta", "base")) ref_loc = utils.get_in(data, ("config", "resources", "species", "dir"), utils.get_in(data, ("reference", "fasta", "base"))) data["genome_resources"] = genome.get_resources(data["genome_build"], ref_loc) data["reference"]["snpeff"] = effects.get_snpeff_files(data) alt_genome = utils.get_in(data, ("config", "algorithm", "validate_genome_build")) if alt_genome: data["reference"]["alt"] = { alt_genome: genome.get_refs(alt_genome, None, data["dirs"]["galaxy"])["fasta"] } # Re-enable when we have ability to re-define gemini configuration directory if False: if population.do_db_build([data], check_gemini=False, need_bam=False): data["reference"]["gemini"] = population.get_gemini_files(data) return data
def add_reference_resources(data, remote_retriever=None): """Add genome reference information to the item to process. """ aligner = data["config"]["algorithm"].get("aligner", None) if remote_retriever: data["reference"] = remote_retriever.get_refs(data["genome_build"], aligner, data["config"]) else: data["reference"] = genome.get_refs(data["genome_build"], aligner, data["dirs"]["galaxy"], data) _check_ref_files(data["reference"], data) # back compatible `sam_ref` target data["sam_ref"] = utils.get_in(data, ("reference", "fasta", "base")) ref_loc = utils.get_in(data, ("config", "resources", "species", "dir"), utils.get_in(data, ("reference", "fasta", "base"))) if remote_retriever: data = remote_retriever.get_resources(data["genome_build"], ref_loc, data) else: data["genome_resources"] = genome.get_resources( data["genome_build"], ref_loc, data) if effects.get_type( data) == "snpeff" and "snpeff" not in data["reference"]: data["reference"]["snpeff"] = effects.get_snpeff_files(data) data = _fill_validation_targets(data) data = _fill_prioritization_targets(data) # Re-enable when we have ability to re-define gemini configuration directory if False: if population.do_db_build([data], need_bam=False): data["reference"]["gemini"] = population.get_gemini_files(data) return data
def add_reference_resources(data, remote_retriever=None): """Add genome reference information to the item to process. """ aligner = data["config"]["algorithm"].get("aligner", None) if remote_retriever: data["reference"] = remote_retriever.get_refs(data["genome_build"], aligner, data["config"]) else: data["reference"] = genome.get_refs(data["genome_build"], aligner, data["dirs"]["galaxy"], data) _check_ref_files(data["reference"], data) # back compatible `sam_ref` target data["sam_ref"] = utils.get_in(data, ("reference", "fasta", "base")) ref_loc = utils.get_in(data, ("config", "resources", "species", "dir"), utils.get_in(data, ("reference", "fasta", "base"))) if remote_retriever: data = remote_retriever.get_resources(data["genome_build"], ref_loc, data) else: data["genome_resources"] = genome.get_resources(data["genome_build"], ref_loc, data) if effects.get_type(data) == "snpeff" and "snpeff" not in data["reference"]: data["reference"]["snpeff"] = effects.get_snpeff_files(data) data = _fill_validation_targets(data) data = _fill_prioritization_targets(data) # Re-enable when we have ability to re-define gemini configuration directory if False: if population.do_db_build([data], need_bam=False): data["reference"]["gemini"] = population.get_gemini_files(data) return data
def _create_validate_config(vrn_file, rm_file, rm_interval_file, rm_genome, base_dir, data): """Create a bcbio.variation configuration input for validation. """ if rm_genome: rm_genome = genome.get_refs(rm_genome, None, data["dirs"]["galaxy"])[-1] if rm_genome != data["sam_ref"]: eval_genome = data["sam_ref"] else: eval_genome = None else: eval_genome = None rm_genome = data["sam_ref"] ref_call = {"file": str(rm_file), "name": "ref", "type": "grading-ref", "preclean": True, "prep": True, "remove-refcalls": True} a_intervals = get_analysis_intervals(data) if rm_interval_file: ref_call["intervals"] = rm_interval_file eval_call = {"file": vrn_file, "name": "eval", "remove-refcalls": True} if eval_genome: eval_call["ref"] = eval_genome eval_call["preclean"] = True eval_call["prep"] = True if a_intervals and eval_genome: eval_call["intervals"] = os.path.abspath(a_intervals) exp = {"sample": data["name"][-1], "ref": rm_genome, "approach": "grade", "calls": [ref_call, eval_call]} if a_intervals and not eval_genome: exp["intervals"] = os.path.abspath(a_intervals) if data.get("callable_bam") and not eval_genome: exp["align"] = data["callable_bam"] return {"dir": {"base": base_dir, "out": "work", "prep": "work/prep"}, "experiments": [exp]}
def _add_reference_resources(data): """Add genome reference information to the item to process. """ aligner = data["config"]["algorithm"].get("aligner", None) align_ref, sam_ref = genome.get_refs(data["genome_build"], aligner, data["dirs"]["galaxy"]) data["align_ref"] = align_ref data["sam_ref"] = sam_ref data["genome_resources"] = genome.get_resources(data["genome_build"], sam_ref) return data
def add_reference_resources(data): """Add genome reference information to the item to process. """ aligner = data["config"]["algorithm"].get("aligner", None) data["reference"] = genome.get_refs(data["genome_build"], aligner, data["dirs"]["galaxy"]) # back compatible `sam_ref` target data["sam_ref"] = utils.get_in(data, ("reference", "fasta", "base")) ref_loc = utils.get_in(data, ("config", "resources", "species", "dir"), utils.get_in(data, ("reference", "fasta", "base"))) data["genome_resources"] = genome.get_resources(data["genome_build"], ref_loc) data["reference"]["snpeff"] = effects.get_snpeff_files(data) alt_genome = utils.get_in(data, ("config", "algorithm", "validate_genome_build")) if alt_genome: data["reference"]["alt"] = {alt_genome: genome.get_refs(alt_genome, None, data["dirs"]["galaxy"])["fasta"]} # Re-enable when we have ability to re-define gemini configuration directory if False: if population.do_db_build([data], check_gemini=False, need_bam=False): data["reference"]["gemini"] = population.get_gemini_files(data) return data
def get_genome_dir(gid, galaxy_dir, data): """Return standard location of genome directories. """ if galaxy_dir: refs = genome.get_refs(gid, None, galaxy_dir, data) seq_file = tz.get_in(["fasta", "base"], refs) if seq_file and os.path.exists(seq_file): return os.path.dirname(os.path.dirname(seq_file)) else: gdirs = glob.glob(os.path.join(_get_data_dir(), "genomes", "*", gid)) if len(gdirs) == 1 and os.path.exists(gdirs[0]): return gdirs[0]
def _create_validate_config(vrn_file, rm_file, rm_interval_file, rm_genome, base_dir, data): """Create a bcbio.variation configuration input for validation. """ if rm_genome: rm_genome = genome.get_refs(rm_genome, None, data["dirs"]["galaxy"])[-1] if rm_genome != data["sam_ref"]: eval_genome = data["sam_ref"] else: eval_genome = None else: eval_genome = None rm_genome = data["sam_ref"] ref_call = { "file": rm_file, "name": "ref", "type": "grading-ref", "preclean": True, "prep": True, "remove-refcalls": True } if rm_interval_file: ref_call["intervals"] = rm_interval_file eval_call = {"file": vrn_file, "name": "eval", "remove-refcalls": True} if eval_genome: eval_call["ref"] = eval_genome eval_call["preclean"] = True eval_call["prep"] = True exp = { "sample": data["name"][-1], "ref": rm_genome, "approach": "grade", "calls": [ref_call, eval_call] } if data.get("callable_bam"): exp["align"] = data["callable_bam"] intervals = ensemble.get_analysis_intervals(data) if intervals: exp["intervals"] = os.path.abspath(intervals) return { "dir": { "base": base_dir, "out": "work", "prep": "work/prep" }, "experiments": [exp] }