Beispiel #1
0
def add_reference_resources(data):
    """Add genome reference information to the item to process.
    """
    aligner = data["config"]["algorithm"].get("aligner", None)
    data["reference"] = genome.get_refs(data["genome_build"], aligner,
                                        data["dirs"]["galaxy"])
    # back compatible `sam_ref` target
    data["sam_ref"] = utils.get_in(data, ("reference", "fasta", "base"))
    ref_loc = utils.get_in(data, ("config", "resources", "species", "dir"),
                           utils.get_in(data, ("reference", "fasta", "base")))
    data["genome_resources"] = genome.get_resources(data["genome_build"],
                                                    ref_loc)
    data["reference"]["snpeff"] = effects.get_snpeff_files(data)
    alt_genome = utils.get_in(data,
                              ("config", "algorithm", "validate_genome_build"))
    if alt_genome:
        data["reference"]["alt"] = {
            alt_genome:
            genome.get_refs(alt_genome, None, data["dirs"]["galaxy"])["fasta"]
        }
    # Re-enable when we have ability to re-define gemini configuration directory
    if False:
        if population.do_db_build([data], check_gemini=False, need_bam=False):
            data["reference"]["gemini"] = population.get_gemini_files(data)
    return data
Beispiel #2
0
def add_reference_resources(data, remote_retriever=None):
    """Add genome reference information to the item to process.
    """
    aligner = data["config"]["algorithm"].get("aligner", None)
    if remote_retriever:
        data["reference"] = remote_retriever.get_refs(data["genome_build"],
                                                      aligner, data["config"])
    else:
        data["reference"] = genome.get_refs(data["genome_build"], aligner,
                                            data["dirs"]["galaxy"], data)
        _check_ref_files(data["reference"], data)
    # back compatible `sam_ref` target
    data["sam_ref"] = utils.get_in(data, ("reference", "fasta", "base"))
    ref_loc = utils.get_in(data, ("config", "resources", "species", "dir"),
                           utils.get_in(data, ("reference", "fasta", "base")))
    if remote_retriever:
        data = remote_retriever.get_resources(data["genome_build"], ref_loc,
                                              data)
    else:
        data["genome_resources"] = genome.get_resources(
            data["genome_build"], ref_loc, data)
    if effects.get_type(
            data) == "snpeff" and "snpeff" not in data["reference"]:
        data["reference"]["snpeff"] = effects.get_snpeff_files(data)
    data = _fill_validation_targets(data)
    data = _fill_prioritization_targets(data)
    # Re-enable when we have ability to re-define gemini configuration directory
    if False:
        if population.do_db_build([data], need_bam=False):
            data["reference"]["gemini"] = population.get_gemini_files(data)
    return data
Beispiel #3
0
def add_reference_resources(data, remote_retriever=None):
    """Add genome reference information to the item to process.
    """
    aligner = data["config"]["algorithm"].get("aligner", None)
    if remote_retriever:
        data["reference"] = remote_retriever.get_refs(data["genome_build"], aligner, data["config"])
    else:
        data["reference"] = genome.get_refs(data["genome_build"], aligner, data["dirs"]["galaxy"], data)
        _check_ref_files(data["reference"], data)
    # back compatible `sam_ref` target
    data["sam_ref"] = utils.get_in(data, ("reference", "fasta", "base"))
    ref_loc = utils.get_in(data, ("config", "resources", "species", "dir"),
                           utils.get_in(data, ("reference", "fasta", "base")))
    if remote_retriever:
        data = remote_retriever.get_resources(data["genome_build"], ref_loc, data)
    else:
        data["genome_resources"] = genome.get_resources(data["genome_build"], ref_loc, data)
    if effects.get_type(data) == "snpeff" and "snpeff" not in data["reference"]:
        data["reference"]["snpeff"] = effects.get_snpeff_files(data)
    data = _fill_validation_targets(data)
    data = _fill_prioritization_targets(data)
    # Re-enable when we have ability to re-define gemini configuration directory
    if False:
        if population.do_db_build([data], need_bam=False):
            data["reference"]["gemini"] = population.get_gemini_files(data)
    return data
Beispiel #4
0
def _create_validate_config(vrn_file, rm_file, rm_interval_file, rm_genome,
                            base_dir, data):
    """Create a bcbio.variation configuration input for validation.
    """
    if rm_genome:
        rm_genome = genome.get_refs(rm_genome, None, data["dirs"]["galaxy"])[-1]
        if rm_genome != data["sam_ref"]:
            eval_genome = data["sam_ref"]
        else:
            eval_genome = None
    else:
        eval_genome = None
        rm_genome = data["sam_ref"]
    ref_call = {"file": str(rm_file), "name": "ref", "type": "grading-ref",
                "preclean": True, "prep": True, "remove-refcalls": True}
    a_intervals = get_analysis_intervals(data)
    if rm_interval_file:
        ref_call["intervals"] = rm_interval_file
    eval_call = {"file": vrn_file, "name": "eval", "remove-refcalls": True}
    if eval_genome:
        eval_call["ref"] = eval_genome
        eval_call["preclean"] = True
        eval_call["prep"] = True
    if a_intervals and eval_genome:
        eval_call["intervals"] = os.path.abspath(a_intervals)
    exp = {"sample": data["name"][-1],
           "ref": rm_genome,
           "approach": "grade",
           "calls": [ref_call, eval_call]}
    if a_intervals and not eval_genome:
        exp["intervals"] = os.path.abspath(a_intervals)
    if data.get("callable_bam") and not eval_genome:
        exp["align"] = data["callable_bam"]
    return {"dir": {"base": base_dir, "out": "work", "prep": "work/prep"},
            "experiments": [exp]}
Beispiel #5
0
def _add_reference_resources(data):
    """Add genome reference information to the item to process.
    """
    aligner = data["config"]["algorithm"].get("aligner", None)
    align_ref, sam_ref = genome.get_refs(data["genome_build"], aligner, data["dirs"]["galaxy"])
    data["align_ref"] = align_ref
    data["sam_ref"] = sam_ref
    data["genome_resources"] = genome.get_resources(data["genome_build"], sam_ref)
    return data
Beispiel #6
0
def add_reference_resources(data):
    """Add genome reference information to the item to process.
    """
    aligner = data["config"]["algorithm"].get("aligner", None)
    data["reference"] = genome.get_refs(data["genome_build"], aligner, data["dirs"]["galaxy"])
    # back compatible `sam_ref` target
    data["sam_ref"] = utils.get_in(data, ("reference", "fasta", "base"))
    ref_loc = utils.get_in(data, ("config", "resources", "species", "dir"),
                           utils.get_in(data, ("reference", "fasta", "base")))
    data["genome_resources"] = genome.get_resources(data["genome_build"], ref_loc)
    data["reference"]["snpeff"] = effects.get_snpeff_files(data)
    alt_genome = utils.get_in(data, ("config", "algorithm", "validate_genome_build"))
    if alt_genome:
        data["reference"]["alt"] = {alt_genome:
                                    genome.get_refs(alt_genome, None, data["dirs"]["galaxy"])["fasta"]}
    # Re-enable when we have ability to re-define gemini configuration directory
    if False:
        if population.do_db_build([data], check_gemini=False, need_bam=False):
            data["reference"]["gemini"] = population.get_gemini_files(data)
    return data
Beispiel #7
0
def _add_reference_resources(data):
    """Add genome reference information to the item to process.
    """
    aligner = data["config"]["algorithm"].get("aligner", None)
    align_ref, sam_ref = genome.get_refs(data["genome_build"], aligner,
                                         data["dirs"]["galaxy"])
    data["align_ref"] = align_ref
    data["sam_ref"] = sam_ref
    data["genome_resources"] = genome.get_resources(data["genome_build"],
                                                    sam_ref)
    return data
Beispiel #8
0
def get_genome_dir(gid, galaxy_dir, data):
    """Return standard location of genome directories.
    """
    if galaxy_dir:
        refs = genome.get_refs(gid, None, galaxy_dir, data)
        seq_file = tz.get_in(["fasta", "base"], refs)
        if seq_file and os.path.exists(seq_file):
            return os.path.dirname(os.path.dirname(seq_file))
    else:
        gdirs = glob.glob(os.path.join(_get_data_dir(), "genomes", "*", gid))
        if len(gdirs) == 1 and os.path.exists(gdirs[0]):
            return gdirs[0]
Beispiel #9
0
def get_genome_dir(gid, galaxy_dir, data):
    """Return standard location of genome directories.
    """
    if galaxy_dir:
        refs = genome.get_refs(gid, None, galaxy_dir, data)
        seq_file = tz.get_in(["fasta", "base"], refs)
        if seq_file and os.path.exists(seq_file):
            return os.path.dirname(os.path.dirname(seq_file))
    else:
        gdirs = glob.glob(os.path.join(_get_data_dir(), "genomes", "*", gid))
        if len(gdirs) == 1 and os.path.exists(gdirs[0]):
            return gdirs[0]
Beispiel #10
0
def _create_validate_config(vrn_file, rm_file, rm_interval_file, rm_genome,
                            base_dir, data):
    """Create a bcbio.variation configuration input for validation.
    """
    if rm_genome:
        rm_genome = genome.get_refs(rm_genome, None,
                                    data["dirs"]["galaxy"])[-1]
        if rm_genome != data["sam_ref"]:
            eval_genome = data["sam_ref"]
        else:
            eval_genome = None
    else:
        eval_genome = None
        rm_genome = data["sam_ref"]
    ref_call = {
        "file": rm_file,
        "name": "ref",
        "type": "grading-ref",
        "preclean": True,
        "prep": True,
        "remove-refcalls": True
    }
    if rm_interval_file:
        ref_call["intervals"] = rm_interval_file
    eval_call = {"file": vrn_file, "name": "eval", "remove-refcalls": True}
    if eval_genome:
        eval_call["ref"] = eval_genome
        eval_call["preclean"] = True
        eval_call["prep"] = True
    exp = {
        "sample": data["name"][-1],
        "ref": rm_genome,
        "approach": "grade",
        "calls": [ref_call, eval_call]
    }
    if data.get("callable_bam"):
        exp["align"] = data["callable_bam"]
    intervals = ensemble.get_analysis_intervals(data)
    if intervals:
        exp["intervals"] = os.path.abspath(intervals)
    return {
        "dir": {
            "base": base_dir,
            "out": "work",
            "prep": "work/prep"
        },
        "experiments": [exp]
    }